PyPI - etlplus - Versions diffs - 0.4.7__py3-none-any.whl → 0.8.3__py3-none-any.whl - Mend

etlplus 0.4.7py3-none-any.whl → 0.8.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (29) hide show

etlplus/api/README.md +24 -26
etlplus/cli/commands.py +870 -0
etlplus/cli/constants.py +65 -0
etlplus/cli/handlers.py +426 -434
etlplus/cli/io.py +320 -0
etlplus/cli/main.py +14 -367
etlplus/cli/options.py +49 -0
etlplus/cli/state.py +335 -0
etlplus/cli/types.py +33 -0
etlplus/config/pipeline.py +11 -0
etlplus/database/__init__.py +44 -0
etlplus/database/ddl.py +319 -0
etlplus/database/engine.py +151 -0
etlplus/database/orm.py +354 -0
etlplus/database/schema.py +274 -0
etlplus/database/types.py +33 -0
etlplus/run.py +2 -4
etlplus/templates/__init__.py +5 -0
etlplus/templates/ddl.sql.j2 +128 -0
etlplus/templates/view.sql.j2 +69 -0
etlplus/types.py +5 -0
etlplus/utils.py +0 -31
{etlplus-0.4.7.dist-info → etlplus-0.8.3.dist-info}/METADATA +66 -1
{etlplus-0.4.7.dist-info → etlplus-0.8.3.dist-info}/RECORD +28 -14
etlplus/cli/app.py +0 -1239
{etlplus-0.4.7.dist-info → etlplus-0.8.3.dist-info}/WHEEL +0 -0
{etlplus-0.4.7.dist-info → etlplus-0.8.3.dist-info}/entry_points.txt +0 -0
{etlplus-0.4.7.dist-info → etlplus-0.8.3.dist-info}/licenses/LICENSE +0 -0
{etlplus-0.4.7.dist-info → etlplus-0.8.3.dist-info}/top_level.txt +0 -0

etlplus/cli/handlers.py CHANGED Viewed

@@ -6,124 +6,120 @@ Command handler functions for the ``etlplus`` command-line interface (CLI).
 from __future__ import annotations
-import argparse
-import csv
-import io
-import json
 import os
 import sys
+from collections.abc import Mapping
 from pathlib import Path
 from typing import Any
+from typing import Literal
 from typing import cast
 from ..config import PipelineConfig
 from ..config import load_pipeline_config
-from ..enums import FileFormat
+from ..database import load_table_spec
+from ..database import render_tables
 from ..extract import extract
 from ..file import File
 from ..load import load
 from ..run import run
 from ..transform import transform
 from ..types import JSONData
-from ..utils import json_type
-from ..utils import print_json
+from ..types import TemplateKey
+from ..validate import FieldRules
 from ..validate import validate
+from . import io as cli_io
 # SECTION: EXPORTS ========================================================== #
 __all__ = [
     # Functions
-    'cmd_extract',
-    'cmd_list',
-    'cmd_load',
-    'cmd_pipeline',
-    'cmd_run',
-    'cmd_transform',
-    'cmd_validate',
+    'extract_handler',
+    'check_handler',
+    'load_handler',
+    'render_handler',
+    'run_handler',
+    'transform_handler',
+    'validate_handler',
 ]
 # SECTION: INTERNAL FUNCTIONS =============================================== #
-def _emit_json(
-    data: Any,
-    *,
-    pretty: bool,
-) -> None:
+def _collect_table_specs(
+    config_path: str | None,
+    spec_path: str | None,
+) -> list[dict[str, Any]]:
     """
-    Emit JSON to stdout honoring the pretty/compact preference.
+    Load table schemas from a pipeline config and/or standalone spec.
     Parameters
     ----------
-    data : Any
-        Arbitrary JSON-serializable payload.
-    pretty : bool
-        When ``True`` pretty-print via :func:`print_json`; otherwise emit a
-        compact JSON string.
-    """
-    if pretty:
-        print_json(data)
-        return
-    dumped = json.dumps(
-        data,
-        ensure_ascii=False,
-        separators=(',', ':'),
-    )
-    print(dumped)
-def _infer_payload_format(
-    text: str,
-) -> str:
-    """
-    Infer JSON vs CSV from payload text.
-    Parameters
-    ----------
-    text : str
-        Incoming payload as plain text.
+    config_path : str | None
+        Path to a pipeline YAML config file.
+    spec_path : str | None
+        Path to a standalone table spec file.
     Returns
     -------
-    str
-        ``'json'`` when the text starts with ``{``/``[``, else ``'csv'``.
+    list[dict[str, Any]]
+        Collected table specification mappings.
     """
-    stripped = text.lstrip()
-    if stripped.startswith('{') or stripped.startswith('['):
-        return 'json'
-    return 'csv'
+    specs: list[dict[str, Any]] = []
+    if spec_path:
+        specs.append(dict(load_table_spec(Path(spec_path))))
+    if config_path:
+        cfg = load_pipeline_config(config_path, substitute=True)
+        specs.extend(getattr(cfg, 'table_schemas', []))
+    return specs
-def _list_sections(
+def _check_sections(
     cfg: PipelineConfig,
-    args: argparse.Namespace,
+    *,
+    jobs: bool,
+    pipelines: bool,
+    sources: bool,
+    targets: bool,
+    transforms: bool,
 ) -> dict[str, Any]:
     """
-    Build sectioned metadata output for the list command.
+    Build sectioned metadata output for the check command.
     Parameters
     ----------
     cfg : PipelineConfig
         The loaded pipeline configuration.
-    args : argparse.Namespace
-        Parsed command-line arguments.
+    jobs : bool
+        Whether to include job metadata.
+    pipelines : bool
+        Whether to include pipeline metadata.
+    sources : bool
+        Whether to include source metadata.
+    targets : bool
+        Whether to include target metadata.
+    transforms : bool
+        Whether to include transform metadata.
     Returns
     -------
     dict[str, Any]
-        Metadata output for the list command.
+        Metadata output for the check command.
     """
     sections: dict[str, Any] = {}
-    if getattr(args, 'pipelines', False):
+    if jobs:
+        sections['jobs'] = _pipeline_summary(cfg)['jobs']
+    if pipelines:
         sections['pipelines'] = [cfg.name]
-    if getattr(args, 'sources', False):
+    if sources:
         sections['sources'] = [src.name for src in cfg.sources]
-    if getattr(args, 'targets', False):
+    if targets:
         sections['targets'] = [tgt.name for tgt in cfg.targets]
-    if getattr(args, 'transforms', False):
+    if transforms:
         sections['transforms'] = [
             getattr(trf, 'name', None) for trf in cfg.transforms
         ]
@@ -132,106 +128,6 @@ def _list_sections(
     return sections
-def _explicit_cli_format(
-    args: argparse.Namespace,
-) -> str | None:
-    """Return the explicit CLI format hint when provided."""
-    if not getattr(args, '_format_explicit', False):
-        return None
-    for attr in ('format', 'target_format', 'source_format'):
-        value = getattr(args, attr, None)
-        if value is None:
-            continue
-        normalized = value.strip().lower()
-        if normalized:
-            return normalized
-    return None
-def _materialize_file_payload(
-    source: object,
-    *,
-    format_hint: str | None,
-    format_explicit: bool,
-) -> JSONData | object:
-    """
-    Return structured payloads when ``source`` references a file.
-    Parameters
-    ----------
-    source : object
-        Input source of data, possibly a file path.
-    format_hint : str | None
-        Explicit format hint: 'json', 'csv', or None to infer.
-    format_explicit : bool
-        Whether an explicit format hint was provided.
-    Returns
-    -------
-    JSONData | object
-        Parsed JSON data when ``source`` is a file; otherwise the original
-        ``source`` object.
-    """
-    if isinstance(source, (dict, list)):
-        return cast(JSONData, source)
-    if not isinstance(source, (str, os.PathLike)):
-        return source
-    path = Path(source)
-    normalized_hint = (format_hint or '').strip().lower()
-    fmt: FileFormat | None = None
-    if format_explicit and normalized_hint:
-        try:
-            fmt = FileFormat(normalized_hint)
-        except ValueError:
-            fmt = None
-    elif not format_explicit:
-        suffix = path.suffix.lower().lstrip('.')
-        if suffix:
-            try:
-                fmt = FileFormat(suffix)
-            except ValueError:
-                fmt = None
-    if fmt is None:
-        return source
-    if fmt == FileFormat.CSV:
-        return _read_csv_rows(path)
-    return File(path, fmt).read()
-def _parse_text_payload(
-    text: str,
-    fmt: str | None,
-) -> JSONData | str:
-    """
-    Parse JSON/CSV text into a Python payload.
-    Parameters
-    ----------
-    text : str
-        The input text payload.
-    fmt : str | None
-        Explicit format hint: 'json', 'csv', or None to infer.
-    Returns
-    -------
-    JSONData | str
-        The parsed payload as JSON data or raw text.
-    """
-    effective = (fmt or '').strip().lower() or _infer_payload_format(text)
-    if effective == 'json':
-        return cast(JSONData, json_type(text))
-    if effective == 'csv':
-        reader = csv.DictReader(io.StringIO(text))
-        return [dict(row) for row in reader]
-    return text
 def _pipeline_summary(
     cfg: PipelineConfig,
 ) -> dict[str, Any]:
@@ -260,406 +156,502 @@ def _pipeline_summary(
     }
-def _presentation_flags(
-    args: argparse.Namespace,
-) -> tuple[bool, bool]:
-    """Return presentation toggles from the parsed namespace.
-    Parameters
-    ----------
-    args : argparse.Namespace
-        Namespace produced by the CLI parser.
-    Returns
-    -------
-    tuple[bool, bool]
-        Pair of ``(pretty, quiet)`` flags with safe defaults.
-    """
-    return getattr(args, 'pretty', True), getattr(args, 'quiet', False)
-def _read_csv_rows(
-    path: Path,
-) -> list[dict[str, str]]:
-    """
-    Read CSV rows into dictionaries.
-    Parameters
-    ----------
-    path : Path
-        Path to a CSV file.
-    Returns
-    -------
-    list[dict[str, str]]
-        List of dictionaries, each representing a row in the CSV file.
-    """
-    with path.open(newline='', encoding='utf-8') as handle:
-        reader = csv.DictReader(handle)
-        return [dict(row) for row in reader]
-def _read_stdin_text() -> str:
-    """
-    Return every character from ``stdin`` as a single string.
-    Returns
-    -------
-    str
-        Entire ``stdin`` contents.
-    """
-    return sys.stdin.read()
+# SECTION: FUNCTIONS ======================================================== #
-def _resolve_cli_payload(
-    source: object,
+def check_handler(
     *,
-    format_hint: str | None,
-    format_explicit: bool,
-    hydrate_files: bool = True,
-) -> JSONData | object:
+    config: str,
+    jobs: bool = False,
+    pipelines: bool = False,
+    sources: bool = False,
+    summary: bool = False,
+    targets: bool = False,
+    transforms: bool = False,
+    substitute: bool = True,
+    pretty: bool = True,
+) -> int:
     """
-    Normalize CLI-provided payloads, honoring stdin and inline data.
+    Print requested pipeline sections from a YAML configuration.
     Parameters
     ----------
-    source : object
-        Raw CLI value (path, inline payload, or ``'-'`` for stdin).
-    format_hint : str | None
-        Explicit format hint supplied by the CLI option.
-    format_explicit : bool
-        Flag indicating whether the format hint was explicitly provided.
-    hydrate_files : bool, optional
-        When ``True`` (default) materialize file paths into structured data.
-        When ``False``, keep the original path so downstream code can stream
-        from disk directly.
+    config : str
+        Path to the pipeline YAML configuration.
+    jobs : bool, optional
+        Whether to include job metadata. Default is ``False``.
+    pipelines : bool, optional
+        Whether to include pipeline metadata. Default is ``False``.
+    sources : bool, optional
+        Whether to include source metadata. Default is ``False``.
+    summary : bool, optional
+        Whether to print a full summary of the pipeline. Default is ``False``.
+    targets : bool, optional
+        Whether to include target metadata. Default is ``False``.
+    transforms : bool, optional
+        Whether to include transform metadata. Default is ``False``.
+    substitute : bool, optional
+        Whether to perform environment variable substitution. Default is
+        ``True``.
+    pretty : bool, optional
+        Whether to pretty-print output. Default is ``True``.
     Returns
     -------
-    JSONData | object
-        Parsed payload or the original source value when hydration is
-        disabled.
-    """
-    if isinstance(source, (os.PathLike, str)) and str(source) == '-':
-        text = _read_stdin_text()
-        return _parse_text_payload(text, format_hint)
+    int
+        Zero on success.
-    if not hydrate_files:
-        return source
+    """
+    cfg = load_pipeline_config(config, substitute=substitute)
+    if summary:
+        cli_io.emit_json(_pipeline_summary(cfg), pretty=True)
+        return 0
-    return _materialize_file_payload(
-        source,
-        format_hint=format_hint,
-        format_explicit=format_explicit,
+    cli_io.emit_json(
+        _check_sections(
+            cfg,
+            jobs=jobs,
+            pipelines=pipelines,
+            sources=sources,
+            targets=targets,
+            transforms=transforms,
+        ),
+        pretty=pretty,
     )
+    return 0
-def _write_json_output(
-    data: Any,
-    output_path: str | None,
+def extract_handler(
     *,
-    success_message: str,
-) -> bool:
-    """
-    Optionally persist JSON data to disk.
-    Parameters
-    ----------
-    data : Any
-        Data to write.
-    output_path : str | None
-        Path to write the output to. None to print to stdout.
-    success_message : str
-        Message to print upon successful write.
-    Returns
-    -------
-    bool
-        True if output was written to a file, False if printed to stdout.
-    """
-    if not output_path or output_path == '-':
-        return False
-    File(Path(output_path), FileFormat.JSON).write_json(data)
-    print(f'{success_message} {output_path}')
-    return True
-# SECTION: FUNCTIONS ======================================================== #
-def cmd_extract(
-    args: argparse.Namespace,
+    source_type: str,
+    source: str,
+    format_hint: str | None = None,
+    format_explicit: bool = False,
+    target: str | None = None,
+    output: str | None = None,
+    pretty: bool = True,
 ) -> int:
     """
     Extract data from a source.
     Parameters
     ----------
-    args : argparse.Namespace
-        Parsed command-line arguments.
+    source_type : str
+        The type of the source (e.g., 'file', 'api', 'database').
+    source : str
+        The source identifier (e.g., path, URL, DSN).
+    format_hint : str | None, optional
+        An optional format hint (e.g., 'json', 'csv'). Default is ``None``.
+    format_explicit : bool, optional
+        Whether the format hint was explicitly provided. Default is ``False``.
+    target : str | None, optional
+        The target destination (e.g., path, database). Default is ``None``.
+    output : str | None, optional
+        Path to write output data. Default is ``None``.
+    pretty : bool, optional
+        Whether to pretty-print output. Default is ``True``.
     Returns
     -------
     int
         Zero on success.
     """
-    pretty, _ = _presentation_flags(args)
-    explicit_format = _explicit_cli_format(args)
+    explicit_format = format_hint if format_explicit else None
-    if args.source == '-':
-        text = _read_stdin_text()
-        payload = _parse_text_payload(text, getattr(args, 'format', None))
-        _emit_json(payload, pretty=pretty)
+    if source == '-':
+        text = cli_io.read_stdin_text()
+        payload = cli_io.parse_text_payload(
+            text,
+            format_hint,
+        )
+        cli_io.emit_json(payload, pretty=pretty)
         return 0
     result = extract(
-        args.source_type,
-        args.source,
+        source_type,
+        source,
         file_format=explicit_format,
     )
-    output_path = getattr(args, 'target', None)
-    if output_path is None:
-        output_path = getattr(args, 'output', None)
+    output_path = target or output
-    if not _write_json_output(
+    cli_io.emit_or_write(
         result,
         output_path,
+        pretty=pretty,
         success_message='Data extracted and saved to',
-    ):
-        _emit_json(result, pretty=pretty)
+    )
     return 0
-def cmd_validate(
-    args: argparse.Namespace,
+def load_handler(
+    *,
+    source: str,
+    target_type: str,
+    target: str,
+    source_format: str | None = None,
+    target_format: str | None = None,
+    format_explicit: bool = False,
+    output: str | None = None,
+    pretty: bool = True,
 ) -> int:
     """
-    Validate data from a source.
+    Load data into a target.
     Parameters
     ----------
-    args : argparse.Namespace
-        Parsed command-line arguments.
+    source : str
+        The source payload (e.g., path, inline data).
+    target_type : str
+        The type of the target (e.g., 'file', 'database').
+    target : str
+        The target destination (e.g., path, DSN).
+    source_format : str | None, optional
+        An optional source format hint (e.g., 'json', 'csv'). Default is
+        ``None``.
+    target_format : str | None, optional
+        An optional target format hint (e.g., 'json', 'csv'). Default is
+        ``None``.
+    format_explicit : bool, optional
+        Whether the format hint was explicitly provided. Default is ``False``.
+    output : str | None, optional
+        Path to write output data. Default is ``None``.
+    pretty : bool, optional
+        Whether to pretty-print output. Default is ``True``.
     Returns
     -------
     int
         Zero on success.
     """
-    pretty, _quiet = _presentation_flags(args)
-    format_explicit: bool = getattr(args, '_format_explicit', False)
-    format_hint: str | None = getattr(args, 'source_format', None)
-    payload = cast(
-        JSONData | str,
-        _resolve_cli_payload(
-            args.source,
-            format_hint=format_hint,
-            format_explicit=format_explicit,
+    explicit_format = target_format if format_explicit else None
+    # Allow piping into load.
+    source_value = cast(
+        str | Path | os.PathLike[str] | dict[str, Any] | list[dict[str, Any]],
+        cli_io.resolve_cli_payload(
+            source,
+            format_hint=source_format,
+            format_explicit=source_format is not None,
+            hydrate_files=False,
         ),
     )
-    result = validate(payload, args.rules)
-    target_path = getattr(args, 'target', None)
-    if target_path:
-        validated_data = result.get('data')
-        if validated_data is not None:
-            _write_json_output(
-                validated_data,
-                target_path,
-                success_message='Validation result saved to',
-            )
-        else:
-            print(
-                f'Validation failed, no data to save for {target_path}',
-                file=sys.stderr,
-            )
-    else:
-        _emit_json(result, pretty=pretty)
+    # Allow piping out of load for file targets.
+    if target_type == 'file' and target == '-':
+        payload = cli_io.materialize_file_payload(
+            source_value,
+            format_hint=source_format,
+            format_explicit=source_format is not None,
+        )
+        cli_io.emit_json(payload, pretty=pretty)
+        return 0
+    result = load(
+        source_value,
+        target_type,
+        target,
+        file_format=explicit_format,
+    )
+    output_path = output
+    cli_io.emit_or_write(
+        result,
+        output_path,
+        pretty=pretty,
+        success_message='Load result saved to',
+    )
     return 0
-def cmd_transform(
-    args: argparse.Namespace,
+def render_handler(
+    *,
+    config: str | None = None,
+    spec: str | None = None,
+    table: str | None = None,
+    template: TemplateKey | None = None,
+    template_path: str | None = None,
+    output: str | None = None,
+    pretty: bool = True,
+    quiet: bool = False,
 ) -> int:
     """
-    Transform data from a source.
+    Render SQL DDL statements from table schema specs.
     Parameters
     ----------
-    args : argparse.Namespace
-        Parsed command-line arguments.
+    config : str | None, optional
+        Path to a pipeline YAML configuration. Default is ``None``.
+    spec : str | None, optional
+        Path to a standalone table spec file. Default is ``None``.
+    table : str | None, optional
+        Table name filter. Default is ``None``.
+    template : TemplateKey | None, optional
+        The template key to use for rendering. Default is ``None``.
+    template_path : str | None, optional
+        Path to a custom template file. Default is ``None``.
+    output : str | None, optional
+        Path to write output SQL. Default is ``None``.
+    pretty : bool, optional
+        Whether to pretty-print output. Default is ``True``.
+    quiet : bool, optional
+        Whether to suppress non-error output. Default is ``False``.
     Returns
     -------
     int
         Zero on success.
     """
-    pretty, _quiet = _presentation_flags(args)
-    format_hint: str | None = getattr(args, 'source_format', None)
-    format_explicit: bool = format_hint is not None
+    template_value: TemplateKey = template or 'ddl'
+    template_path_override = template_path
+    table_filter = table
+    spec_path = spec
+    config_path = config
+    # If the provided template points to a file, treat it as a path override.
+    file_override = template_path_override
+    template_key: TemplateKey | None = template_value
+    if template_path_override is None:
+        candidate_path = Path(template_value)
+        if candidate_path.exists():
+            file_override = str(candidate_path)
+            template_key = None
+    specs = _collect_table_specs(config_path, spec_path)
+    if table_filter:
+        specs = [
+            spec
+            for spec in specs
+            if str(spec.get('table')) == table_filter
+            or str(spec.get('name', '')) == table_filter
+        ]
-    payload = cast(
-        JSONData | str,
-        _resolve_cli_payload(
-            args.source,
-            format_hint=format_hint,
-            format_explicit=format_explicit,
-        ),
-    )
+    if not specs:
+        target_desc = table_filter or 'table_schemas'
+        print(
+            'No table schemas found for '
+            f'{target_desc}. Provide --spec or a pipeline --config with '
+            'table_schemas.',
+            file=sys.stderr,
+        )
+        return 1
-    data = transform(payload, args.operations)
+    rendered_chunks = render_tables(
+        specs,
+        template=template_key,
+        template_path=file_override,
+    )
+    sql_text = (
+        '\n'.join(chunk.rstrip() for chunk in rendered_chunks).rstrip() + '\n'
+    )
+    rendered_output = sql_text if pretty else sql_text.rstrip('\n')
-    if not _write_json_output(
-        data,
-        getattr(args, 'target', None),
-        success_message='Data transformed and saved to',
-    ):
-        _emit_json(data, pretty=pretty)
+    output_path = output
+    if output_path and output_path != '-':
+        Path(output_path).write_text(rendered_output, encoding='utf-8')
+        if not quiet:
+            print(f'Rendered {len(specs)} schema(s) to {output_path}')
+        return 0
+    print(rendered_output)
     return 0
-def cmd_load(
-    args: argparse.Namespace,
+def run_handler(
+    *,
+    config: str,
+    job: str | None = None,
+    pipeline: str | None = None,
+    pretty: bool = True,
 ) -> int:
     """
-    Load data into a target.
+    Execute an ETL job end-to-end from a pipeline YAML configuration.
     Parameters
     ----------
-    args : argparse.Namespace
-        Parsed command-line arguments.
+    config : str
+        Path to the pipeline YAML configuration.
+    job : str | None, optional
+        Name of the job to run. If not provided, runs the entire pipeline.
+        Default is ``None``.
+    pipeline : str | None, optional
+        Alias for ``job``. Default is ``None``.
+    pretty : bool, optional
+        Whether to pretty-print output. Default is ``True``.
     Returns
     -------
     int
         Zero on success.
     """
-    pretty, _ = _presentation_flags(args)
-    explicit_format = _explicit_cli_format(args)
-    # Allow piping into load.
-    source_format = getattr(args, 'source_format', None)
-    source_value = cast(
-        str | Path | os.PathLike[str] | dict[str, Any] | list[dict[str, Any]],
-        _resolve_cli_payload(
-            args.source,
-            format_hint=source_format,
-            format_explicit=source_format is not None,
-            hydrate_files=False,
-        ),
-    )
+    cfg = load_pipeline_config(config, substitute=True)
-    # Allow piping out of load for file targets.
-    if args.target_type == 'file' and args.target == '-':
-        payload = _materialize_file_payload(
-            source_value,
-            format_hint=source_format,
-            format_explicit=source_format is not None,
-        )
-        _emit_json(payload, pretty=pretty)
+    job_name = job or pipeline
+    if job_name:
+        result = run(job=job_name, config_path=config)
+        cli_io.emit_json({'status': 'ok', 'result': result}, pretty=pretty)
         return 0
-    result = load(
-        source_value,
-        args.target_type,
-        args.target,
-        file_format=explicit_format,
-    )
+    cli_io.emit_json(_pipeline_summary(cfg), pretty=pretty)
+    return 0
-    output_path = getattr(args, 'output', None)
-    if not _write_json_output(
-        result,
-        output_path,
-        success_message='Load result saved to',
-    ):
-        _emit_json(result, pretty=pretty)
-    return 0
+TransformOperations = Mapping[
+    Literal['filter', 'map', 'select', 'sort', 'aggregate'],
+    Any,
+]
-def cmd_pipeline(
-    args: argparse.Namespace,
+def transform_handler(
+    *,
+    source: str,
+    operations: JSONData | str,
+    target: str | None = None,
+    source_format: str | None = None,
+    target_format: str | None = None,
+    pretty: bool = True,
+    format_explicit: bool = False,
 ) -> int:
     """
-    Inspect or run a pipeline YAML configuration.
+    Transform data from a source.
     Parameters
     ----------
-    args : argparse.Namespace
-        Parsed command-line arguments.
+    source : str
+        The source payload (e.g., path, inline data).
+    operations : JSONData | str
+        The transformation operations (inline JSON or path).
+    target : str | None, optional
+        The target destination (e.g., path). Default is ``None``.
+    source_format : str | None, optional
+        An optional source format hint (e.g., 'json', 'csv'). Default is
+        ``None``.
+    target_format : str | None, optional
+        An optional target format hint (e.g., 'json', 'csv'). Default is
+        ``None``.
+    pretty : bool, optional
+        Whether to pretty-print output. Default is ``True``.
+    format_explicit : bool, optional
+        Whether the format hint was explicitly provided. Default is ``False``.
     Returns
     -------
     int
         Zero on success.
+    Raises
+    ------
+    ValueError
+        If the operations payload is not a mapping.
     """
-    cfg = load_pipeline_config(args.config, substitute=True)
+    format_hint: str | None = source_format
+    format_explicit = format_hint is not None or format_explicit
+    payload = cast(
+        JSONData | str,
+        cli_io.resolve_cli_payload(
+            source,
+            format_hint=format_hint,
+            format_explicit=format_explicit,
+        ),
+    )
-    list_flag = getattr(args, 'list', False) or getattr(args, 'jobs', False)
-    run_target = (
-        getattr(args, 'run', None)
-        or getattr(args, 'job', None)
-        or getattr(args, 'pipeline', None)
+    operations_payload = cli_io.resolve_cli_payload(
+        operations,
+        format_hint=None,
+        format_explicit=format_explicit,
     )
+    if not isinstance(operations_payload, dict):
+        raise ValueError('operations must resolve to a mapping of transforms')
-    if list_flag and not run_target:
-        print_json({'jobs': _pipeline_summary(cfg)['jobs']})
-        return 0
+    data = transform(payload, cast(TransformOperations, operations_payload))
-    if run_target:
-        result = run(job=run_target, config_path=args.config)
-        print_json({'status': 'ok', 'result': result})
+    if target and target != '-':
+        File.write_file(target, data, file_format=target_format)
+        print(f'Data transformed and saved to {target}')
         return 0
-    print_json(_pipeline_summary(cfg))
+    cli_io.emit_json(data, pretty=pretty)
     return 0
-def cmd_list(args: argparse.Namespace) -> int:
+def validate_handler(
+    *,
+    source: str,
+    rules: JSONData | str,
+    source_format: str | None = None,
+    target: str | None = None,
+    format_explicit: bool = False,
+    pretty: bool = True,
+) -> int:
     """
-    Print requested pipeline sections from a YAML configuration.
+    Validate data from a source.
     Parameters
     ----------
-    args : argparse.Namespace
-        Parsed command-line arguments.
+    source : str
+        The source payload (e.g., path, inline data).
+    rules : JSONData | str
+        The validation rules (inline JSON or path).
+    source_format : str | None, optional
+        An optional source format hint (e.g., 'json', 'csv'). Default is
+        ``None``.
+    target : str | None, optional
+        The target destination (e.g., path). Default is ``None``.
+    format_explicit : bool, optional
+        Whether the format hint was explicitly provided. Default is ``False``.
+    pretty : bool, optional
+        Whether to pretty-print output. Default is ``True``.
     Returns
     -------
     int
         Zero on success.
-    """
-    cfg = load_pipeline_config(args.config, substitute=True)
-    print_json(_list_sections(cfg, args))
-    return 0
-def cmd_run(args: argparse.Namespace) -> int:
+    Raises
+    ------
+    ValueError
+        If the rules payload is not a mapping.
     """
-    Execute an ETL job end-to-end from a pipeline YAML configuration.
+    format_hint: str | None = source_format
+    payload = cast(
+        JSONData | str,
+        cli_io.resolve_cli_payload(
+            source,
+            format_hint=format_hint,
+            format_explicit=format_explicit,
+        ),
+    )
-    Parameters
-    ----------
-    args : argparse.Namespace
-        Parsed command-line arguments.
+    rules_payload = cli_io.resolve_cli_payload(
+        rules,
+        format_hint=None,
+        format_explicit=format_explicit,
+    )
+    if not isinstance(rules_payload, dict):
+        raise ValueError('rules must resolve to a mapping of field rules')
-    Returns
-    -------
-    int
-        Zero on success.
-    """
-    cfg = load_pipeline_config(args.config, substitute=True)
+    field_rules = cast(Mapping[str, FieldRules], rules_payload)
+    result = validate(payload, field_rules)
-    job_name = getattr(args, 'job', None) or getattr(args, 'pipeline', None)
-    if job_name:
-        result = run(job=job_name, config_path=args.config)
-        print_json({'status': 'ok', 'result': result})
-        return 0
+    target_path = target
+    if target_path:
+        validated_data = result.get('data')
+        if validated_data is not None:
+            cli_io.write_json_output(
+                validated_data,
+                target_path,
+                success_message='Validation result saved to',
+            )
+        else:
+            print(
+                f'Validation failed, no data to save for {target_path}',
+                file=sys.stderr,
+            )
+    else:
+        cli_io.emit_json(result, pretty=pretty)
-    print_json(_pipeline_summary(cfg))
     return 0

etlplus 0.4.7__py3-none-any.whl → 0.8.3__py3-none-any.whl

etlplus 0.4.7py3-none-any.whl → 0.8.3py3-none-any.whl