PyPI - etlplus - Versions diffs - 0.8.2__py3-none-any.whl → 0.8.4__py3-none-any.whl - Mend

etlplus 0.8.2py3-none-any.whl → 0.8.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

etlplus/api/README.md +24 -26
etlplus/cli/commands.py +361 -136
etlplus/cli/handlers.py +286 -82
etlplus/cli/io.py +29 -52
etlplus/cli/main.py +13 -331
etlplus/cli/options.py +2 -68
etlplus/cli/state.py +9 -85
etlplus/utils.py +0 -31
{etlplus-0.8.2.dist-info → etlplus-0.8.4.dist-info}/METADATA +5 -2
{etlplus-0.8.2.dist-info → etlplus-0.8.4.dist-info}/RECORD +14 -14
{etlplus-0.8.2.dist-info → etlplus-0.8.4.dist-info}/WHEEL +0 -0
{etlplus-0.8.2.dist-info → etlplus-0.8.4.dist-info}/entry_points.txt +0 -0
{etlplus-0.8.2.dist-info → etlplus-0.8.4.dist-info}/licenses/LICENSE +0 -0
{etlplus-0.8.2.dist-info → etlplus-0.8.4.dist-info}/top_level.txt +0 -0

etlplus/cli/handlers.py CHANGED Viewed

@@ -6,11 +6,12 @@ Command handler functions for the ``etlplus`` command-line interface (CLI).
 from __future__ import annotations
-import argparse
 import os
 import sys
+from collections.abc import Mapping
 from pathlib import Path
 from typing import Any
+from typing import Literal
 from typing import cast
 from ..config import PipelineConfig
@@ -18,11 +19,13 @@ from ..config import load_pipeline_config
 from ..database import load_table_spec
 from ..database import render_tables
 from ..extract import extract
+from ..file import File
 from ..load import load
 from ..run import run
 from ..transform import transform
 from ..types import JSONData
 from ..types import TemplateKey
+from ..validate import FieldRules
 from ..validate import validate
 from . import io as cli_io
@@ -77,7 +80,12 @@ def _collect_table_specs(
 def _check_sections(
     cfg: PipelineConfig,
-    args: argparse.Namespace,
+    *,
+    jobs: bool,
+    pipelines: bool,
+    sources: bool,
+    targets: bool,
+    transforms: bool,
 ) -> dict[str, Any]:
     """
     Build sectioned metadata output for the check command.
@@ -86,8 +94,16 @@ def _check_sections(
     ----------
     cfg : PipelineConfig
         The loaded pipeline configuration.
-    args : argparse.Namespace
-        Parsed command-line arguments.
+    jobs : bool
+        Whether to include job metadata.
+    pipelines : bool
+        Whether to include pipeline metadata.
+    sources : bool
+        Whether to include source metadata.
+    targets : bool
+        Whether to include target metadata.
+    transforms : bool
+        Whether to include transform metadata.
     Returns
     -------
@@ -95,15 +111,15 @@ def _check_sections(
         Metadata output for the check command.
     """
     sections: dict[str, Any] = {}
-    if getattr(args, 'jobs', False):
+    if jobs:
         sections['jobs'] = _pipeline_summary(cfg)['jobs']
-    if getattr(args, 'pipelines', False):
+    if pipelines:
         sections['pipelines'] = [cfg.name]
-    if getattr(args, 'sources', False):
+    if sources:
         sections['sources'] = [src.name for src in cfg.sources]
-    if getattr(args, 'targets', False):
+    if targets:
         sections['targets'] = [tgt.name for tgt in cfg.targets]
-    if getattr(args, 'transforms', False):
+    if transforms:
         sections['transforms'] = [
             getattr(trf, 'name', None) for trf in cfg.transforms
         ]
@@ -144,67 +160,121 @@ def _pipeline_summary(
 def check_handler(
-    args: argparse.Namespace,
+    *,
+    config: str,
+    jobs: bool = False,
+    pipelines: bool = False,
+    sources: bool = False,
+    summary: bool = False,
+    targets: bool = False,
+    transforms: bool = False,
+    substitute: bool = True,
+    pretty: bool = True,
 ) -> int:
     """
     Print requested pipeline sections from a YAML configuration.
     Parameters
     ----------
-    args : argparse.Namespace
-        Parsed command-line arguments.
+    config : str
+        Path to the pipeline YAML configuration.
+    jobs : bool, optional
+        Whether to include job metadata. Default is ``False``.
+    pipelines : bool, optional
+        Whether to include pipeline metadata. Default is ``False``.
+    sources : bool, optional
+        Whether to include source metadata. Default is ``False``.
+    summary : bool, optional
+        Whether to print a full summary of the pipeline. Default is ``False``.
+    targets : bool, optional
+        Whether to include target metadata. Default is ``False``.
+    transforms : bool, optional
+        Whether to include transform metadata. Default is ``False``.
+    substitute : bool, optional
+        Whether to perform environment variable substitution. Default is
+        ``True``.
+    pretty : bool, optional
+        Whether to pretty-print output. Default is ``True``.
     Returns
     -------
     int
         Zero on success.
     """
-    cfg = load_pipeline_config(args.config, substitute=True)
-    if getattr(args, 'summary', False):
+    cfg = load_pipeline_config(config, substitute=substitute)
+    if summary:
         cli_io.emit_json(_pipeline_summary(cfg), pretty=True)
         return 0
-    cli_io.emit_json(_check_sections(cfg, args), pretty=True)
+    cli_io.emit_json(
+        _check_sections(
+            cfg,
+            jobs=jobs,
+            pipelines=pipelines,
+            sources=sources,
+            targets=targets,
+            transforms=transforms,
+        ),
+        pretty=pretty,
+    )
     return 0
 def extract_handler(
-    args: argparse.Namespace,
+    *,
+    source_type: str,
+    source: str,
+    format_hint: str | None = None,
+    format_explicit: bool = False,
+    target: str | None = None,
+    output: str | None = None,
+    pretty: bool = True,
 ) -> int:
     """
     Extract data from a source.
     Parameters
     ----------
-    args : argparse.Namespace
-        Parsed command-line arguments.
+    source_type : str
+        The type of the source (e.g., 'file', 'api', 'database').
+    source : str
+        The source identifier (e.g., path, URL, DSN).
+    format_hint : str | None, optional
+        An optional format hint (e.g., 'json', 'csv'). Default is ``None``.
+    format_explicit : bool, optional
+        Whether the format hint was explicitly provided. Default is ``False``.
+    target : str | None, optional
+        The target destination (e.g., path, database). Default is ``None``.
+    output : str | None, optional
+        Path to write output data. Default is ``None``.
+    pretty : bool, optional
+        Whether to pretty-print output. Default is ``True``.
     Returns
     -------
     int
         Zero on success.
     """
-    pretty, _ = cli_io.presentation_flags(args)
-    explicit_format = cli_io.explicit_cli_format(args)
+    explicit_format = format_hint if format_explicit else None
-    if args.source == '-':
+    if source == '-':
         text = cli_io.read_stdin_text()
         payload = cli_io.parse_text_payload(
             text,
-            getattr(args, 'format', None),
+            format_hint,
         )
         cli_io.emit_json(payload, pretty=pretty)
         return 0
     result = extract(
-        args.source_type,
-        args.source,
+        source_type,
+        source,
         file_format=explicit_format,
     )
-    output_path = getattr(args, 'target', None)
-    if output_path is None:
-        output_path = getattr(args, 'output', None)
+    output_path = target or output
     cli_io.emit_or_write(
         result,
@@ -217,30 +287,52 @@ def extract_handler(
 def load_handler(
-    args: argparse.Namespace,
+    *,
+    source: str,
+    target_type: str,
+    target: str,
+    source_format: str | None = None,
+    target_format: str | None = None,
+    format_explicit: bool = False,
+    output: str | None = None,
+    pretty: bool = True,
 ) -> int:
     """
     Load data into a target.
     Parameters
     ----------
-    args : argparse.Namespace
-        Parsed command-line arguments.
+    source : str
+        The source payload (e.g., path, inline data).
+    target_type : str
+        The type of the target (e.g., 'file', 'database').
+    target : str
+        The target destination (e.g., path, DSN).
+    source_format : str | None, optional
+        An optional source format hint (e.g., 'json', 'csv'). Default is
+        ``None``.
+    target_format : str | None, optional
+        An optional target format hint (e.g., 'json', 'csv'). Default is
+        ``None``.
+    format_explicit : bool, optional
+        Whether the format hint was explicitly provided. Default is ``False``.
+    output : str | None, optional
+        Path to write output data. Default is ``None``.
+    pretty : bool, optional
+        Whether to pretty-print output. Default is ``True``.
     Returns
     -------
     int
         Zero on success.
     """
-    pretty, _ = cli_io.presentation_flags(args)
-    explicit_format = cli_io.explicit_cli_format(args)
+    explicit_format = target_format if format_explicit else None
     # Allow piping into load.
-    source_format = getattr(args, 'source_format', None)
     source_value = cast(
         str | Path | os.PathLike[str] | dict[str, Any] | list[dict[str, Any]],
         cli_io.resolve_cli_payload(
-            args.source,
+            source,
             format_hint=source_format,
             format_explicit=source_format is not None,
             hydrate_files=False,
@@ -248,7 +340,7 @@ def load_handler(
     )
     # Allow piping out of load for file targets.
-    if args.target_type == 'file' and args.target == '-':
+    if target_type == 'file' and target == '-':
         payload = cli_io.materialize_file_payload(
             source_value,
             format_hint=source_format,
@@ -259,12 +351,12 @@ def load_handler(
     result = load(
         source_value,
-        args.target_type,
-        args.target,
+        target_type,
+        target,
         file_format=explicit_format,
     )
-    output_path = getattr(args, 'output', None)
+    output_path = output
     cli_io.emit_or_write(
         result,
         output_path,
@@ -276,21 +368,53 @@ def load_handler(
 def render_handler(
-    args: argparse.Namespace,
+    *,
+    config: str | None = None,
+    spec: str | None = None,
+    table: str | None = None,
+    template: TemplateKey | None = None,
+    template_path: str | None = None,
+    output: str | None = None,
+    pretty: bool = True,
+    quiet: bool = False,
 ) -> int:
-    """Render SQL DDL statements from table schema specs."""
-    _, quiet = cli_io.presentation_flags(args)
+    """
+    Render SQL DDL statements from table schema specs.
+    Parameters
+    ----------
+    config : str | None, optional
+        Path to a pipeline YAML configuration. Default is ``None``.
+    spec : str | None, optional
+        Path to a standalone table spec file. Default is ``None``.
+    table : str | None, optional
+        Table name filter. Default is ``None``.
+    template : TemplateKey | None, optional
+        The template key to use for rendering. Default is ``None``.
+    template_path : str | None, optional
+        Path to a custom template file. Default is ``None``.
+    output : str | None, optional
+        Path to write output SQL. Default is ``None``.
+    pretty : bool, optional
+        Whether to pretty-print output. Default is ``True``.
+    quiet : bool, optional
+        Whether to suppress non-error output. Default is ``False``.
-    template_value: TemplateKey = getattr(args, 'template', 'ddl') or 'ddl'
-    template_path = getattr(args, 'template_path', None)
-    table_filter = getattr(args, 'table', None)
-    spec_path = getattr(args, 'spec', None)
-    config_path = getattr(args, 'config', None)
+    Returns
+    -------
+    int
+        Zero on success.
+    """
+    template_value: TemplateKey = template or 'ddl'
+    template_path_override = template_path
+    table_filter = table
+    spec_path = spec
+    config_path = config
     # If the provided template points to a file, treat it as a path override.
-    file_override = template_path
+    file_override = template_path_override
     template_key: TemplateKey | None = template_value
-    if template_path is None:
+    if template_path_override is None:
         candidate_path = Path(template_value)
         if candidate_path.exists():
             file_override = str(candidate_path)
@@ -323,117 +447,197 @@ def render_handler(
     sql_text = (
         '\n'.join(chunk.rstrip() for chunk in rendered_chunks).rstrip() + '\n'
     )
+    rendered_output = sql_text if pretty else sql_text.rstrip('\n')
-    output_path = getattr(args, 'output', None)
+    output_path = output
     if output_path and output_path != '-':
-        Path(output_path).write_text(sql_text, encoding='utf-8')
+        Path(output_path).write_text(rendered_output, encoding='utf-8')
         if not quiet:
             print(f'Rendered {len(specs)} schema(s) to {output_path}')
         return 0
-    print(sql_text)
+    print(rendered_output)
     return 0
 def run_handler(
-    args: argparse.Namespace,
+    *,
+    config: str,
+    job: str | None = None,
+    pipeline: str | None = None,
+    pretty: bool = True,
 ) -> int:
     """
     Execute an ETL job end-to-end from a pipeline YAML configuration.
     Parameters
     ----------
-    args : argparse.Namespace
-        Parsed command-line arguments.
+    config : str
+        Path to the pipeline YAML configuration.
+    job : str | None, optional
+        Name of the job to run. If not provided, runs the entire pipeline.
+        Default is ``None``.
+    pipeline : str | None, optional
+        Alias for ``job``. Default is ``None``.
+    pretty : bool, optional
+        Whether to pretty-print output. Default is ``True``.
     Returns
     -------
     int
         Zero on success.
     """
-    cfg = load_pipeline_config(args.config, substitute=True)
+    cfg = load_pipeline_config(config, substitute=True)
-    job_name = getattr(args, 'job', None) or getattr(args, 'pipeline', None)
+    job_name = job or pipeline
     if job_name:
-        result = run(job=job_name, config_path=args.config)
-        cli_io.emit_json({'status': 'ok', 'result': result}, pretty=True)
+        result = run(job=job_name, config_path=config)
+        cli_io.emit_json({'status': 'ok', 'result': result}, pretty=pretty)
         return 0
-    cli_io.emit_json(_pipeline_summary(cfg), pretty=True)
+    cli_io.emit_json(_pipeline_summary(cfg), pretty=pretty)
     return 0
+TransformOperations = Mapping[
+    Literal['filter', 'map', 'select', 'sort', 'aggregate'],
+    Any,
+]
 def transform_handler(
-    args: argparse.Namespace,
+    *,
+    source: str,
+    operations: JSONData | str,
+    target: str | None = None,
+    source_format: str | None = None,
+    target_format: str | None = None,
+    pretty: bool = True,
+    format_explicit: bool = False,
 ) -> int:
     """
     Transform data from a source.
     Parameters
     ----------
-    args : argparse.Namespace
-        Parsed command-line arguments.
+    source : str
+        The source payload (e.g., path, inline data).
+    operations : JSONData | str
+        The transformation operations (inline JSON or path).
+    target : str | None, optional
+        The target destination (e.g., path). Default is ``None``.
+    source_format : str | None, optional
+        An optional source format hint (e.g., 'json', 'csv'). Default is
+        ``None``.
+    target_format : str | None, optional
+        An optional target format hint (e.g., 'json', 'csv'). Default is
+        ``None``.
+    pretty : bool, optional
+        Whether to pretty-print output. Default is ``True``.
+    format_explicit : bool, optional
+        Whether the format hint was explicitly provided. Default is ``False``.
     Returns
     -------
     int
         Zero on success.
+    Raises
+    ------
+    ValueError
+        If the operations payload is not a mapping.
     """
-    pretty, _ = cli_io.presentation_flags(args)
-    format_hint: str | None = getattr(args, 'source_format', None)
-    format_explicit: bool = format_hint is not None
+    format_hint: str | None = source_format
+    format_explicit = format_hint is not None or format_explicit
     payload = cast(
         JSONData | str,
         cli_io.resolve_cli_payload(
-            args.source,
+            source,
             format_hint=format_hint,
             format_explicit=format_explicit,
         ),
     )
-    data = transform(payload, args.operations)
-    cli_io.emit_or_write(
-        data,
-        getattr(args, 'target', None),
-        pretty=pretty,
-        success_message='Data transformed and saved to',
+    operations_payload = cli_io.resolve_cli_payload(
+        operations,
+        format_hint=None,
+        format_explicit=format_explicit,
     )
+    if not isinstance(operations_payload, dict):
+        raise ValueError('operations must resolve to a mapping of transforms')
+    data = transform(payload, cast(TransformOperations, operations_payload))
+    if target and target != '-':
+        File.write_file(target, data, file_format=target_format)
+        print(f'Data transformed and saved to {target}')
+        return 0
+    cli_io.emit_json(data, pretty=pretty)
     return 0
 def validate_handler(
-    args: argparse.Namespace,
+    *,
+    source: str,
+    rules: JSONData | str,
+    source_format: str | None = None,
+    target: str | None = None,
+    format_explicit: bool = False,
+    pretty: bool = True,
 ) -> int:
     """
     Validate data from a source.
     Parameters
     ----------
-    args : argparse.Namespace
-        Parsed command-line arguments.
+    source : str
+        The source payload (e.g., path, inline data).
+    rules : JSONData | str
+        The validation rules (inline JSON or path).
+    source_format : str | None, optional
+        An optional source format hint (e.g., 'json', 'csv'). Default is
+        ``None``.
+    target : str | None, optional
+        The target destination (e.g., path). Default is ``None``.
+    format_explicit : bool, optional
+        Whether the format hint was explicitly provided. Default is ``False``.
+    pretty : bool, optional
+        Whether to pretty-print output. Default is ``True``.
     Returns
     -------
     int
         Zero on success.
+    Raises
+    ------
+    ValueError
+        If the rules payload is not a mapping.
     """
-    pretty, _ = cli_io.presentation_flags(args)
-    format_explicit: bool = getattr(args, '_format_explicit', False)
-    format_hint: str | None = getattr(args, 'source_format', None)
+    format_hint: str | None = source_format
     payload = cast(
         JSONData | str,
         cli_io.resolve_cli_payload(
-            args.source,
+            source,
             format_hint=format_hint,
             format_explicit=format_explicit,
         ),
     )
-    result = validate(payload, args.rules)
-    target_path = getattr(args, 'target', None)
+    rules_payload = cli_io.resolve_cli_payload(
+        rules,
+        format_hint=None,
+        format_explicit=format_explicit,
+    )
+    if not isinstance(rules_payload, dict):
+        raise ValueError('rules must resolve to a mapping of field rules')
+    field_rules = cast(Mapping[str, FieldRules], rules_payload)
+    result = validate(payload, field_rules)
+    target_path = target
     if target_path:
         validated_data = result.get('data')
         if validated_data is not None:

etlplus 0.8.2__py3-none-any.whl → 0.8.4__py3-none-any.whl

etlplus 0.8.2py3-none-any.whl → 0.8.4py3-none-any.whl