PyPI - etlplus - Versions diffs - 0.12.12__py3-none-any.whl → 0.15.0__py3-none-any.whl - Mend

etlplus 0.12.12py3-none-any.whl → 0.15.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (50) hide show

etlplus/README.md +2 -2
etlplus/__init__.py +1 -26
etlplus/api/README.md +2 -2
etlplus/api/__init__.py +10 -0
etlplus/api/config.py +36 -20
etlplus/api/endpoint_client.py +3 -3
etlplus/api/enums.py +51 -0
etlplus/api/pagination/client.py +1 -1
etlplus/api/rate_limiting/config.py +13 -1
etlplus/api/rate_limiting/rate_limiter.py +8 -11
etlplus/api/request_manager.py +11 -6
etlplus/api/transport.py +14 -2
etlplus/api/types.py +7 -6
etlplus/{run_helpers.py → api/utils.py} +209 -153
etlplus/cli/README.md +2 -2
etlplus/cli/handlers.py +19 -9
etlplus/config/README.md +31 -33
etlplus/config/__init__.py +9 -32
etlplus/config/types.py +0 -64
etlplus/dag.py +103 -0
etlplus/database/README.md +2 -2
etlplus/enums.py +0 -32
etlplus/file/README.md +2 -2
etlplus/file/enums.py +1 -1
etlplus/{validation → ops}/README.md +2 -2
etlplus/ops/__init__.py +61 -0
etlplus/{extract.py → ops/extract.py} +78 -94
etlplus/{load.py → ops/load.py} +73 -93
etlplus/{run.py → ops/run.py} +153 -118
etlplus/{transform.py → ops/transform.py} +75 -68
etlplus/{validation → ops}/utils.py +80 -15
etlplus/{validate.py → ops/validate.py} +19 -9
etlplus/templates/README.md +2 -2
etlplus/types.py +2 -2
etlplus/workflow/README.md +52 -0
etlplus/workflow/__init__.py +43 -0
etlplus/{config → workflow}/connector.py +17 -16
etlplus/workflow/dag.py +105 -0
etlplus/{config → workflow}/jobs.py +31 -15
etlplus/{config → workflow}/pipeline.py +11 -3
etlplus/{config → workflow}/profile.py +8 -5
etlplus/workflow/types.py +115 -0
{etlplus-0.12.12.dist-info → etlplus-0.15.0.dist-info}/METADATA +91 -60
{etlplus-0.12.12.dist-info → etlplus-0.15.0.dist-info}/RECORD +49 -43
{etlplus-0.12.12.dist-info → etlplus-0.15.0.dist-info}/WHEEL +1 -1
etlplus/validation/__init__.py +0 -44
/etlplus/{config → workflow}/utils.py +0 -0
{etlplus-0.12.12.dist-info → etlplus-0.15.0.dist-info}/entry_points.txt +0 -0
{etlplus-0.12.12.dist-info → etlplus-0.15.0.dist-info}/licenses/LICENSE +0 -0
{etlplus-0.12.12.dist-info → etlplus-0.15.0.dist-info}/top_level.txt +0 -0

etlplus/{validate.py → ops/validate.py} RENAMED Viewed

@@ -1,5 +1,5 @@
 """
-:mod:`etlplus.validation` module.
+:mod:`etlplus.ops.validate` module.
 Validate dicts and lists of dicts using simple, schema-like rules.
@@ -34,11 +34,11 @@ from typing import Final
 from typing import Literal
 from typing import TypedDict
+from ..types import JSONData
+from ..types import Record
+from ..types import StrAnyMap
+from ..types import StrPath
 from .load import load_data
-from .types import JSONData
-from .types import Record
-from .types import StrAnyMap
-from .types import StrPath
 # SECTION: EXPORTS ========================================================== #
@@ -279,11 +279,15 @@ def _type_matches(
     bool
         ``True`` if the value matches the expected type; ``False`` if not.
     """
-    py_type = TYPE_MAP.get(expected)
-    if py_type:
-        return isinstance(value, py_type)
+    if expected == 'number':
+        return _is_number(value)
+    if expected == 'integer':
+        return isinstance(value, int) and not isinstance(value, bool)
+    if expected == 'boolean':
+        return isinstance(value, bool)
-    return False
+    py_type = TYPE_MAP.get(expected)
+    return isinstance(value, py_type) if py_type else False
 def _validate_record(
@@ -330,6 +334,9 @@ def _validate_record(
 # SECTION: FUNCTIONS ======================================================== #
+# -- Helpers -- #
 def validate_field(
     value: Any,
     rules: StrAnyMap | FieldRules,
@@ -425,6 +432,9 @@ def validate_field(
     return {'valid': len(errors) == 0, 'errors': errors}
+# -- Orchestration -- #
 def validate(
     source: StrPath | JSONData,
     rules: RulesMap | None = None,

etlplus/templates/README.md CHANGED Viewed

@@ -1,4 +1,4 @@
-# etlplus.templates subpackage
+# `etlplus.templates` Subpackage
 Documentation for the `etlplus.templates` subpackage: SQL and DDL template helpers.
@@ -8,7 +8,7 @@ Documentation for the `etlplus.templates` subpackage: SQL and DDL template helpe
 Back to project overview: see the top-level [README](../../README.md).
-- [etlplus.templates subpackage](#etlpustemplates-subpackage)
+- [`etlplus.templates` Subpackage](#etlplus-templates-subpackage)
     - [Available Templates](#available-templates)
     - [Rendering Templates](#rendering-templates)
     - [Example: Rendering a DDL Template](#example-rendering-a-ddl-template)

etlplus/types.py CHANGED Viewed

@@ -193,8 +193,8 @@ type AggregateSpec = StrAnyMap
 # -- Pipelines-- #
-# Unified pipeline step spec consumed by :mod:`etlplus.transform`.
-type StepSpec = FilterSpec | MapSpec | SelectSpec | SortSpec | AggregateSpec
+# Unified pipeline step spec consumed by :mod:`etlplus.ops.transform`.
+type StepSpec = AggregateSpec | FilterSpec | MapSpec | SelectSpec | SortSpec
 # Collections of steps

etlplus/workflow/README.md ADDED Viewed

@@ -0,0 +1,52 @@
+# `etlplus.workflow` Subpackage
+Documentation for the `etlplus.workflow` subpackage: configuration helpers for connectors,
+pipelines, jobs, and profiles.
+- Provides classes and utilities for managing ETL pipeline configuration
+- Supports YAML/JSON config loading and validation
+- Includes helpers for connectors, jobs, pipelines, and profiles
+- Exposes type definitions for config schemas
+Back to project overview: see the top-level [README](../../README.md).
+- [`etlplus.workflow` Subpackage](#etlplusworkflow-subpackage)
+  - [Supported Configuration Types](#supported-configuration-types)
+  - [Loading and Validating Configs](#loading-and-validating-configs)
+  - [Example: Loading a Pipeline Config](#example-loading-a-pipeline-config)
+  - [See Also](#see-also)
+## Supported Configuration Types
+- **Connector**: Connection details for databases, files, or APIs
+- **Job**: ETL job definitions and scheduling
+- **Pipeline**: End-to-end pipeline configuration
+- **Profile**: User or environment-specific settings
+## Loading and Validating Configs
+Use the provided classes to load and validate configuration files:
+```python
+from etlplus.workflow import PipelineConfig
+cfg = PipelineConfig.from_yaml("pipeline.yml")
+```
+- Supports YAML and JSON formats
+- Validates against expected schema
+## Example: Loading a Pipeline Config
+```python
+from etlplus.workflow import PipelineConfig
+pipeline = PipelineConfig.from_yaml("configs/pipeline.yml")
+print(pipeline)
+```
+## See Also
+- Top-level CLI and library usage in the main [README](../../README.md)
+- Config type definitions in [types.py](types.py)
+- Config utilities in [utils.py](utils.py)

etlplus/workflow/__init__.py ADDED Viewed

@@ -0,0 +1,43 @@
+"""
+:mod:`etlplus.workflow` package.
+Job workflow helpers.
+"""
+from __future__ import annotations
+from .connector import Connector
+from .connector import ConnectorApi
+from .connector import ConnectorDb
+from .connector import ConnectorFile
+from .connector import parse_connector
+from .dag import topological_sort_jobs
+from .jobs import ExtractRef
+from .jobs import JobConfig
+from .jobs import LoadRef
+from .jobs import TransformRef
+from .jobs import ValidationRef
+from .pipeline import PipelineConfig
+from .pipeline import load_pipeline_config
+# SECTION: EXPORTS ========================================================== #
+__all__ = [
+    # Data Classes
+    'ConnectorApi',
+    'ConnectorDb',
+    'ConnectorFile',
+    'ExtractRef',
+    'JobConfig',
+    'LoadRef',
+    'PipelineConfig',
+    'TransformRef',
+    'ValidationRef',
+    # Functions
+    'load_pipeline_config',
+    'parse_connector',
+    'topological_sort_jobs',
+    # Type Aliases
+    'Connector',
+]

etlplus/{config → workflow}/connector.py RENAMED Viewed

@@ -1,5 +1,5 @@
 """
-:mod:`etlplus.config.connector` module.
+:mod:`etlplus.workflow.connector` module.
 A module defining configuration types for data source/target connectors in ETL
 pipelines. A "connector" is any I/O endpoint:
@@ -11,18 +11,19 @@ pipelines. A "connector" is any I/O endpoint:
 Examples
 --------
-- Use ``ConnectorApi``/``ConnectorFile``/``ConnectorDb`` when you want the
-  concrete dataclasses.
-- Use the ``Connector`` union for typing a value that can be any connector.
-- Use ``parse_connector(obj)`` to construct a connector instance from a generic
-  mapping that includes a ``type`` key.
+- Use :class:`ConnectorApi`/:class:`ConnectorFile`/:class:`ConnectorDb` when
+    you want the concrete dataclasses.
+- Use the :class:`Connector` union for typing a value that can be any
+    connector.
+- Use :func:`parse_connector(obj)` to construct a connector instance from a
+    generic mapping that includes a *type* key.
 Notes
 -----
 - TypedDict shapes are editor hints; runtime parsing remains permissive
-  (from_obj accepts Mapping[str, Any]).
+    (from_obj accepts Mapping[str, Any]).
 - TypedDicts referenced in :mod:`etlplus.config.types` remain editor hints.
-  Runtime parsing stays permissive and tolerant.
+    Runtime parsing stays permissive and tolerant.
 See Also
 --------
@@ -59,7 +60,7 @@ if TYPE_CHECKING:  # Editor-only typing hints to avoid runtime imports
 __all__ = [
-    # Classes
+    # Data Classes
     'ConnectorApi',
     'ConnectorDb',
     'ConnectorFile',
@@ -83,12 +84,12 @@ class ConnectorApi:
     name : str
         Unique connector name.
     type : ConnectorType
-        Connector kind literal, always ``"api"``.
+        Connector kind literal, always ``'api'``.
     url : str | None
         Direct absolute URL (when not using ``service``/``endpoint`` refs).
     method : str | None
         Optional HTTP method; typically omitted for sources (defaults to
-        GET) and used for targets (e.g., ``"post"``).
+        GET) and used for targets (e.g., ``'post'``).
     headers : dict[str, str]
         Additional request headers.
     query_params : dict[str, Any]
@@ -111,7 +112,7 @@ class ConnectorApi:
     # Direct form
     url: str | None = None
-    # Optional HTTP method; typically omitted for sources (defaults to GET
+    # Optional HTTP method; typically omitted for sources (defaults to GET)
     # at runtime) and used for targets (e.g., 'post', 'put').
     method: str | None = None
     headers: dict[str, str] = field(default_factory=dict)
@@ -185,7 +186,7 @@ class ConnectorDb:
     name : str
         Unique connector name.
     type : ConnectorType
-        Connector kind literal, always ``"database"``.
+        Connector kind literal, always ``'database'``.
     connection_string : str | None
         Connection string/DSN for the database.
     query : str | None
@@ -193,7 +194,7 @@ class ConnectorDb:
     table : str | None
         Target/source table name (optional).
     mode : str | None
-        Load mode hint (e.g., ``"append"``, ``"replace"``) — future use.
+        Load mode hint (e.g., ``'append'``, ``'replace'``) — future use.
     """
     # -- Attributes -- #
@@ -262,9 +263,9 @@ class ConnectorFile:
     name : str
         Unique connector name.
     type : ConnectorType
-        Connector kind literal, always ``"file"``.
+        Connector kind literal, always ``'file'``.
     format : str | None
-        File format (e.g., ``"json"``, ``"csv"``).
+        File format (e.g., ``'json'``, ``'csv'``).
     path : str | None
         File path or URI.
     options : dict[str, Any]

etlplus/workflow/dag.py ADDED Viewed

@@ -0,0 +1,105 @@
+"""
+:mod:`etlplus.workflow.dag` module.
+Lightweight directed acyclic graph (DAG) helpers for ordering jobs based on
+:attr:`depends_on`.
+"""
+from __future__ import annotations
+from collections import deque
+from dataclasses import dataclass
+from .jobs import JobConfig
+# SECTION: EXPORTS ========================================================== #
+__all__ = [
+    # Errors
+    'DagError',
+    # Functions
+    'topological_sort_jobs',
+]
+# SECTION: ERRORS =========================================================== #
+@dataclass(slots=True)
+class DagError(ValueError):
+    """
+    Raised when the job dependency graph is invalid.
+    Attributes
+    ----------
+    message : str
+        Error message.
+    """
+    # -- Attributes -- #
+    message: str
+    # -- Magic Methods (Object Representation) -- #
+    def __str__(self) -> str:
+        return self.message
+# SECTION: FUNCTIONS ======================================================== #
+def topological_sort_jobs(
+    jobs: list[JobConfig],
+) -> list[JobConfig]:
+    """
+    Return jobs in topological order based on :attr:`depends_on`.
+    Parameters
+    ----------
+    jobs : list[JobConfig]
+        List of job configurations to sort.
+    Returns
+    -------
+    list[JobConfig]
+        Jobs sorted in topological order.
+    Raises
+    ------
+    DagError
+        If a dependency is missing, self-referential, or when a cycle is
+        detected.
+    """
+    index = {job.name: job for job in jobs}
+    edges: dict[str, set[str]] = {name: set() for name in index}
+    indegree: dict[str, int] = {name: 0 for name in index}
+    for job in jobs:
+        for dep in job.depends_on:
+            if dep not in index:
+                raise DagError(
+                    f'Unknown dependency "{dep}" in job "{job.name}"',
+                )
+            if dep == job.name:
+                raise DagError(f'Job "{job.name}" depends on itself')
+            if job.name not in edges[dep]:
+                edges[dep].add(job.name)
+                indegree[job.name] += 1
+    queue = deque(sorted(name for name, deg in indegree.items() if deg == 0))
+    ordered: list[str] = []
+    while queue:
+        name = queue.popleft()
+        ordered.append(name)
+        for child in sorted(edges[name]):
+            indegree[child] -= 1
+            if indegree[child] == 0:
+                queue.append(child)
+    if len(ordered) != len(jobs):
+        raise DagError('Dependency cycle detected')
+    return [index[name] for name in ordered]

etlplus/{config → workflow}/jobs.py RENAMED Viewed

@@ -1,12 +1,12 @@
 """
-:mod:`etlplus.config.jobs` module.
+:mod:`etlplus.workflow.jobs` module.
 Data classes modeling job orchestration references (extract, validate,
 transform, load).
 Notes
 -----
-- Lightweight references used inside ``PipelineConfig`` to avoid storing
+- Lightweight references used inside :class:`PipelineConfig` to avoid storing
     large nested structures.
 - All attributes are simple and optional where appropriate, keeping parsing
     tolerant.
@@ -26,6 +26,7 @@ from ..utils import maybe_mapping
 __all__ = [
+    # Data Classes
     'ExtractRef',
     'JobConfig',
     'LoadRef',
@@ -34,10 +35,7 @@ __all__ = [
 ]
-# SECTION: TYPE ALIASES ===================================================== #
-# SECTION: CLASSES ========================================================== #
+# SECTION: DATA CLASSES ===================================================== #
 @dataclass(kw_only=True, slots=True)
@@ -65,12 +63,13 @@ class ExtractRef:
         cls,
         obj: Any,
     ) -> Self | None:
-        """Parse a mapping into an :class:`ExtractRef` instance.
+        """
+        Parse a mapping into an :class:`ExtractRef` instance.
         Parameters
         ----------
         obj : Any
-            Mapping with ``source`` and optional ``options``.
+            Mapping with :attr:`source` and optional :attr:`options`.
         Returns
         -------
@@ -100,6 +99,8 @@ class JobConfig:
         Unique job name.
     description : str | None
         Optional human-friendly description.
+    depends_on : list[str]
+        Optional job dependency list. Dependencies must refer to other jobs.
     extract : ExtractRef | None
         Extraction reference.
     validate : ValidationRef | None
@@ -114,6 +115,7 @@ class JobConfig:
     name: str
     description: str | None = None
+    depends_on: list[str] = field(default_factory=list)
     extract: ExtractRef | None = None
     validate: ValidationRef | None = None
     transform: TransformRef | None = None
@@ -126,7 +128,8 @@ class JobConfig:
         cls,
         obj: Any,
     ) -> Self | None:
-        """Parse a mapping into a :class:`JobConfig` instance.
+        """
+        Parse a mapping into a :class:`JobConfig` instance.
         Parameters
         ----------
@@ -149,9 +152,19 @@ class JobConfig:
         if description is not None and not isinstance(description, str):
             description = str(description)
+        depends_raw = data.get('depends_on')
+        depends_on: list[str] = []
+        if isinstance(depends_raw, str):
+            depends_on = [depends_raw]
+        elif isinstance(depends_raw, list):
+            for entry in depends_raw:
+                if isinstance(entry, str):
+                    depends_on.append(entry)
         return cls(
             name=name,
             description=description,
+            depends_on=depends_on,
             extract=ExtractRef.from_obj(data.get('extract')),
             validate=ValidationRef.from_obj(data.get('validate')),
             transform=TransformRef.from_obj(data.get('transform')),
@@ -184,12 +197,13 @@ class LoadRef:
         cls,
         obj: Any,
     ) -> Self | None:
-        """Parse a mapping into a :class:`LoadRef` instance.
+        """
+        Parse a mapping into a :class:`LoadRef` instance.
         Parameters
         ----------
         obj : Any
-            Mapping with ``target`` and optional ``overrides``.
+            Mapping with :attr:`target` and optional :attr:`overrides`.
         Returns
         -------
@@ -230,12 +244,13 @@ class TransformRef:
         cls,
         obj: Any,
     ) -> Self | None:
-        """Parse a mapping into a :class:`TransformRef` instance.
+        """
+        Parse a mapping into a :class:`TransformRef` instance.
         Parameters
         ----------
         obj : Any
-            Mapping with ``pipeline``.
+            Mapping with :attr:`pipeline`.
         Returns
         -------
@@ -280,12 +295,13 @@ class ValidationRef:
         cls,
         obj: Any,
     ) -> Self | None:
-        """Parse a mapping into a :class:`ValidationRef` instance.
+        """
+        Parse a mapping into a :class:`ValidationRef` instance.
         Parameters
         ----------
         obj : Any
-            Mapping with ``ruleset`` plus optional metadata.
+            Mapping with :attr:`ruleset` plus optional metadata.
         Returns
         -------

etlplus/{config → workflow}/pipeline.py RENAMED Viewed

@@ -1,5 +1,5 @@
 """
-:mod:`etlplus.config.pipeline` module.
+:mod:`etlplus.workflow.pipeline` module.
 Pipeline configuration model and helpers for job orchestration.
@@ -38,7 +38,15 @@ from .utils import deep_substitute
 # SECTION: EXPORTS ========================================================== #
-__all__ = ['PipelineConfig', 'load_pipeline_config']
+__all__ = [
+    # Data Classes
+    'PipelineConfig',
+    # Functions
+    'load_pipeline_config',
+]
+# SECTION: INTERNAL FUNCTIONS =============================================== #
 def _build_jobs(
@@ -156,7 +164,7 @@ def load_pipeline_config(
     return PipelineConfig.from_yaml(path, substitute=substitute, env=env)
-# SECTION: CLASSES ========================================================== #
+# SECTION: DATA CLASSES ===================================================== #
 @dataclass(kw_only=True, slots=True)

etlplus/{config → workflow}/profile.py RENAMED Viewed

@@ -1,5 +1,5 @@
 """
-:mod:`etlplus.config.profile` module.
+:mod:`etlplus.workflow.profile` module.
 Profile model for pipeline-level defaults and environment.
@@ -22,10 +22,13 @@ from ..utils import cast_str_dict
 # SECTION: EXPORTS ========================================================== #
-__all__ = ['ProfileConfig']
+__all__ = [
+    # Data Classes
+    'ProfileConfig',
+]
-# SECTION: CLASSES ========================================================== #
+# SECTION: DATA CLASSES ===================================================== #
 @dataclass(kw_only=True, slots=True)
@@ -53,7 +56,7 @@ class ProfileConfig:
         cls,
         obj: StrAnyMap | None,
     ) -> Self:
-        """Parse a mapping into a ``ProfileConfig`` instance.
+        """Parse a mapping into a :class:`ProfileConfig` instance.
         Parameters
         ----------
@@ -64,7 +67,7 @@ class ProfileConfig:
         -------
         Self
             Parsed profile configuration; non-mapping input yields a default
-            instance. All ``env`` values are coerced to strings.
+            instance. All :attr:`env` values are coerced to strings.
         """
         if not isinstance(obj, Mapping):
             return cls()

etlplus 0.12.12__py3-none-any.whl → 0.15.0__py3-none-any.whl

etlplus 0.12.12py3-none-any.whl → 0.15.0py3-none-any.whl