PyPI - etlplus - Versions diffs - 0.4.7__py3-none-any.whl → 0.7.0__py3-none-any.whl - Mend

etlplus 0.4.7py3-none-any.whl → 0.7.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (19) hide show

etlplus/cli/app.py +257 -129
etlplus/cli/handlers.py +244 -135
etlplus/cli/main.py +74 -24
etlplus/config/pipeline.py +11 -0
etlplus/database/__init__.py +42 -0
etlplus/database/ddl.py +311 -0
etlplus/database/engine.py +146 -0
etlplus/database/orm.py +347 -0
etlplus/database/schema.py +273 -0
etlplus/run.py +2 -4
etlplus/templates/__init__.py +5 -0
etlplus/templates/ddl.sql.j2 +128 -0
etlplus/templates/view.sql.j2 +69 -0
{etlplus-0.4.7.dist-info → etlplus-0.7.0.dist-info}/METADATA +65 -1
{etlplus-0.4.7.dist-info → etlplus-0.7.0.dist-info}/RECORD +19 -11
{etlplus-0.4.7.dist-info → etlplus-0.7.0.dist-info}/WHEEL +0 -0
{etlplus-0.4.7.dist-info → etlplus-0.7.0.dist-info}/entry_points.txt +0 -0
{etlplus-0.4.7.dist-info → etlplus-0.7.0.dist-info}/licenses/LICENSE +0 -0
{etlplus-0.4.7.dist-info → etlplus-0.7.0.dist-info}/top_level.txt +0 -0

etlplus/cli/handlers.py CHANGED Viewed

@@ -18,6 +18,8 @@ from typing import cast
 from ..config import PipelineConfig
 from ..config import load_pipeline_config
+from ..database import load_table_spec
+from ..database import render_tables
 from ..enums import FileFormat
 from ..extract import extract
 from ..file import File
@@ -34,19 +36,51 @@ from ..validate import validate
 __all__ = [
     # Functions
-    'cmd_extract',
-    'cmd_list',
-    'cmd_load',
-    'cmd_pipeline',
-    'cmd_run',
-    'cmd_transform',
-    'cmd_validate',
+    'extract_handler',
+    'check_handler',
+    'load_handler',
+    'pipeline_handler',
+    'render_handler',
+    'run_handler',
+    'transform_handler',
+    'validate_handler',
 ]
 # SECTION: INTERNAL FUNCTIONS =============================================== #
+def _collect_table_specs(
+    config_path: str | None,
+    spec_path: str | None,
+) -> list[dict[str, Any]]:
+    """
+    Load table schemas from a pipeline config and/or standalone spec.
+    Parameters
+    ----------
+    config_path : str | None
+        Path to a pipeline YAML config file.
+    spec_path : str | None
+        Path to a standalone table spec file.
+    Returns
+    -------
+    list[dict[str, Any]]
+        Collected table specification mappings.
+    """
+    specs: list[dict[str, Any]] = []
+    if spec_path:
+        specs.append(load_table_spec(Path(spec_path)))
+    if config_path:
+        cfg = load_pipeline_config(config_path, substitute=True)
+        specs.extend(getattr(cfg, 'table_schemas', []))
+    return specs
 def _emit_json(
     data: Any,
     *,
@@ -75,6 +109,23 @@ def _emit_json(
     print(dumped)
+def _explicit_cli_format(
+    args: argparse.Namespace,
+) -> str | None:
+    """Return the explicit CLI format hint when provided."""
+    if not getattr(args, '_format_explicit', False):
+        return None
+    for attr in ('format', 'target_format', 'source_format'):
+        value = getattr(args, attr, None)
+        if value is None:
+            continue
+        normalized = value.strip().lower()
+        if normalized:
+            return normalized
+    return None
 def _infer_payload_format(
     text: str,
 ) -> str:
@@ -97,12 +148,12 @@ def _infer_payload_format(
     return 'csv'
-def _list_sections(
+def _check_sections(
     cfg: PipelineConfig,
     args: argparse.Namespace,
 ) -> dict[str, Any]:
     """
-    Build sectioned metadata output for the list command.
+    Build sectioned metadata output for the check command.
     Parameters
     ----------
@@ -114,9 +165,11 @@ def _list_sections(
     Returns
     -------
     dict[str, Any]
-        Metadata output for the list command.
+        Metadata output for the check command.
     """
     sections: dict[str, Any] = {}
+    if getattr(args, 'jobs', False):
+        sections['jobs'] = _pipeline_summary(cfg)['jobs']
     if getattr(args, 'pipelines', False):
         sections['pipelines'] = [cfg.name]
     if getattr(args, 'sources', False):
@@ -132,23 +185,6 @@ def _list_sections(
     return sections
-def _explicit_cli_format(
-    args: argparse.Namespace,
-) -> str | None:
-    """Return the explicit CLI format hint when provided."""
-    if not getattr(args, '_format_explicit', False):
-        return None
-    for attr in ('format', 'target_format', 'source_format'):
-        value = getattr(args, attr, None)
-        if value is None:
-            continue
-        normalized = value.strip().lower()
-        if normalized:
-            return normalized
-    return None
 def _materialize_file_payload(
     source: object,
     *,
@@ -222,7 +258,6 @@ def _parse_text_payload(
     JSONData | str
         The parsed payload as JSON data or raw text.
     """
     effective = (fmt or '').strip().lower() or _infer_payload_format(text)
     if effective == 'json':
         return cast(JSONData, json_type(text))
@@ -263,7 +298,8 @@ def _pipeline_summary(
 def _presentation_flags(
     args: argparse.Namespace,
 ) -> tuple[bool, bool]:
-    """Return presentation toggles from the parsed namespace.
+    """
+    Return presentation toggles from the parsed namespace.
     Parameters
     ----------
@@ -340,7 +376,6 @@ def _resolve_cli_payload(
         Parsed payload or the original source value when hydration is
         disabled.
     """
     if isinstance(source, (os.PathLike, str)) and str(source) == '-':
         text = _read_stdin_text()
         return _parse_text_payload(text, format_hint)
@@ -388,11 +423,11 @@ def _write_json_output(
 # SECTION: FUNCTIONS ======================================================== #
-def cmd_extract(
+def check_handler(
     args: argparse.Namespace,
 ) -> int:
     """
-    Extract data from a source.
+    Print requested pipeline sections from a YAML configuration.
     Parameters
     ----------
@@ -404,40 +439,20 @@ def cmd_extract(
     int
         Zero on success.
     """
-    pretty, _ = _presentation_flags(args)
-    explicit_format = _explicit_cli_format(args)
-    if args.source == '-':
-        text = _read_stdin_text()
-        payload = _parse_text_payload(text, getattr(args, 'format', None))
-        _emit_json(payload, pretty=pretty)
+    cfg = load_pipeline_config(args.config, substitute=True)
+    if getattr(args, 'summary', False):
+        print_json(_pipeline_summary(cfg))
         return 0
-    result = extract(
-        args.source_type,
-        args.source,
-        file_format=explicit_format,
-    )
-    output_path = getattr(args, 'target', None)
-    if output_path is None:
-        output_path = getattr(args, 'output', None)
-    if not _write_json_output(
-        result,
-        output_path,
-        success_message='Data extracted and saved to',
-    ):
-        _emit_json(result, pretty=pretty)
+    print_json(_check_sections(cfg, args))
     return 0
-def cmd_validate(
+def extract_handler(
     args: argparse.Namespace,
 ) -> int:
     """
-    Validate data from a source.
+    Extract data from a source.
     Parameters
     ----------
@@ -449,81 +464,36 @@ def cmd_validate(
     int
         Zero on success.
     """
-    pretty, _quiet = _presentation_flags(args)
-    format_explicit: bool = getattr(args, '_format_explicit', False)
-    format_hint: str | None = getattr(args, 'source_format', None)
-    payload = cast(
-        JSONData | str,
-        _resolve_cli_payload(
-            args.source,
-            format_hint=format_hint,
-            format_explicit=format_explicit,
-        ),
-    )
-    result = validate(payload, args.rules)
-    target_path = getattr(args, 'target', None)
-    if target_path:
-        validated_data = result.get('data')
-        if validated_data is not None:
-            _write_json_output(
-                validated_data,
-                target_path,
-                success_message='Validation result saved to',
-            )
-        else:
-            print(
-                f'Validation failed, no data to save for {target_path}',
-                file=sys.stderr,
-            )
-    else:
-        _emit_json(result, pretty=pretty)
-    return 0
-def cmd_transform(
-    args: argparse.Namespace,
-) -> int:
-    """
-    Transform data from a source.
+    pretty, _ = _presentation_flags(args)
+    explicit_format = _explicit_cli_format(args)
-    Parameters
-    ----------
-    args : argparse.Namespace
-        Parsed command-line arguments.
+    if args.source == '-':
+        text = _read_stdin_text()
+        payload = _parse_text_payload(text, getattr(args, 'format', None))
+        _emit_json(payload, pretty=pretty)
-    Returns
-    -------
-    int
-        Zero on success.
-    """
-    pretty, _quiet = _presentation_flags(args)
-    format_hint: str | None = getattr(args, 'source_format', None)
-    format_explicit: bool = format_hint is not None
+        return 0
-    payload = cast(
-        JSONData | str,
-        _resolve_cli_payload(
-            args.source,
-            format_hint=format_hint,
-            format_explicit=format_explicit,
-        ),
+    result = extract(
+        args.source_type,
+        args.source,
+        file_format=explicit_format,
     )
-    data = transform(payload, args.operations)
+    output_path = getattr(args, 'target', None)
+    if output_path is None:
+        output_path = getattr(args, 'output', None)
     if not _write_json_output(
-        data,
-        getattr(args, 'target', None),
-        success_message='Data transformed and saved to',
+        result,
+        output_path,
+        success_message='Data extracted and saved to',
     ):
-        _emit_json(data, pretty=pretty)
+        _emit_json(result, pretty=pretty)
     return 0
-def cmd_load(
+def load_handler(
     args: argparse.Namespace,
 ) -> int:
     """
@@ -582,7 +552,7 @@ def cmd_load(
     return 0
-def cmd_pipeline(
+def pipeline_handler(
     args: argparse.Namespace,
 ) -> int:
     """
@@ -598,6 +568,12 @@ def cmd_pipeline(
     int
         Zero on success.
     """
+    print(
+        'DEPRECATED: use "etlplus check --summary|--jobs" or '
+        '"etlplus run --job/--pipeline" instead of "etlplus pipeline".',
+        file=sys.stderr,
+    )
     cfg = load_pipeline_config(args.config, substitute=True)
     list_flag = getattr(args, 'list', False) or getattr(args, 'jobs', False)
@@ -620,9 +596,71 @@ def cmd_pipeline(
     return 0
-def cmd_list(args: argparse.Namespace) -> int:
+def render_handler(
+    args: argparse.Namespace,
+) -> int:
+    """Render SQL DDL statements from table schema specs."""
+    _, quiet = _presentation_flags(args)
+    template_value = getattr(args, 'template', 'ddl') or 'ddl'
+    template_path = getattr(args, 'template_path', None)
+    table_filter = getattr(args, 'table', None)
+    spec_path = getattr(args, 'spec', None)
+    config_path = getattr(args, 'config', None)
+    # If the provided template points to a file, treat it as a path override.
+    file_override = template_path
+    template_key = template_value
+    if template_path is None:
+        candidate_path = Path(template_value)
+        if candidate_path.exists():
+            file_override = str(candidate_path)
+            template_key = None
+    specs = _collect_table_specs(config_path, spec_path)
+    if table_filter:
+        specs = [
+            spec
+            for spec in specs
+            if str(spec.get('table')) == table_filter
+            or str(spec.get('name', '')) == table_filter
+        ]
+    if not specs:
+        target_desc = table_filter or 'table_schemas'
+        print(
+            'No table schemas found for '
+            f'{target_desc}. Provide --spec or a pipeline --config with '
+            'table_schemas.',
+            file=sys.stderr,
+        )
+        return 1
+    rendered_chunks = render_tables(
+        specs,
+        template=template_key,
+        template_path=file_override,
+    )
+    sql_text = (
+        '\n'.join(chunk.rstrip() for chunk in rendered_chunks).rstrip() + '\n'
+    )
+    output_path = getattr(args, 'output', None)
+    if output_path and output_path != '-':
+        Path(output_path).write_text(sql_text, encoding='utf-8')
+        if not quiet:
+            print(f'Rendered {len(specs)} schema(s) to {output_path}')
+        return 0
+    print(sql_text)
+    return 0
+def run_handler(
+    args: argparse.Namespace,
+) -> int:
     """
-    Print requested pipeline sections from a YAML configuration.
+    Execute an ETL job end-to-end from a pipeline YAML configuration.
     Parameters
     ----------
@@ -635,13 +673,22 @@ def cmd_list(args: argparse.Namespace) -> int:
         Zero on success.
     """
     cfg = load_pipeline_config(args.config, substitute=True)
-    print_json(_list_sections(cfg, args))
+    job_name = getattr(args, 'job', None) or getattr(args, 'pipeline', None)
+    if job_name:
+        result = run(job=job_name, config_path=args.config)
+        print_json({'status': 'ok', 'result': result})
+        return 0
+    print_json(_pipeline_summary(cfg))
     return 0
-def cmd_run(args: argparse.Namespace) -> int:
+def transform_handler(
+    args: argparse.Namespace,
+) -> int:
     """
-    Execute an ETL job end-to-end from a pipeline YAML configuration.
+    Transform data from a source.
     Parameters
     ----------
@@ -653,13 +700,75 @@ def cmd_run(args: argparse.Namespace) -> int:
     int
         Zero on success.
     """
-    cfg = load_pipeline_config(args.config, substitute=True)
+    pretty, _ = _presentation_flags(args)
+    format_hint: str | None = getattr(args, 'source_format', None)
+    format_explicit: bool = format_hint is not None
-    job_name = getattr(args, 'job', None) or getattr(args, 'pipeline', None)
-    if job_name:
-        result = run(job=job_name, config_path=args.config)
-        print_json({'status': 'ok', 'result': result})
-        return 0
+    payload = cast(
+        JSONData | str,
+        _resolve_cli_payload(
+            args.source,
+            format_hint=format_hint,
+            format_explicit=format_explicit,
+        ),
+    )
+    data = transform(payload, args.operations)
+    if not _write_json_output(
+        data,
+        getattr(args, 'target', None),
+        success_message='Data transformed and saved to',
+    ):
+        _emit_json(data, pretty=pretty)
+    return 0
+def validate_handler(
+    args: argparse.Namespace,
+) -> int:
+    """
+    Validate data from a source.
+    Parameters
+    ----------
+    args : argparse.Namespace
+        Parsed command-line arguments.
+    Returns
+    -------
+    int
+        Zero on success.
+    """
+    pretty, _ = _presentation_flags(args)
+    format_explicit: bool = getattr(args, '_format_explicit', False)
+    format_hint: str | None = getattr(args, 'source_format', None)
+    payload = cast(
+        JSONData | str,
+        _resolve_cli_payload(
+            args.source,
+            format_hint=format_hint,
+            format_explicit=format_explicit,
+        ),
+    )
+    result = validate(payload, args.rules)
+    target_path = getattr(args, 'target', None)
+    if target_path:
+        validated_data = result.get('data')
+        if validated_data is not None:
+            _write_json_output(
+                validated_data,
+                target_path,
+                success_message='Validation result saved to',
+            )
+        else:
+            print(
+                f'Validation failed, no data to save for {target_path}',
+                file=sys.stderr,
+            )
+    else:
+        _emit_json(result, pretty=pretty)
-    print_json(_pipeline_summary(cfg))
     return 0

etlplus/cli/main.py CHANGED Viewed

@@ -24,13 +24,14 @@ from ..enums import FileFormat
 from ..utils import json_type
 from .app import PROJECT_URL
 from .app import app
-from .handlers import cmd_extract
-from .handlers import cmd_list
-from .handlers import cmd_load
-from .handlers import cmd_pipeline
-from .handlers import cmd_run
-from .handlers import cmd_transform
-from .handlers import cmd_validate
+from .handlers import check_handler
+from .handlers import extract_handler
+from .handlers import load_handler
+from .handlers import pipeline_handler
+from .handlers import render_handler
+from .handlers import run_handler
+from .handlers import transform_handler
+from .handlers import validate_handler
 # SECTION: EXPORTS ========================================================== #
@@ -328,7 +329,7 @@ def create_parser() -> argparse.ArgumentParser:
         ),
     )
     _add_format_options(extract_parser, context='source')
-    extract_parser.set_defaults(func=cmd_extract)
+    extract_parser.set_defaults(func=extract_handler)
     validate_parser = subparsers.add_parser(
         'validate',
@@ -345,7 +346,7 @@ def create_parser() -> argparse.ArgumentParser:
         default={},
         help='Validation rules as JSON string',
     )
-    validate_parser.set_defaults(func=cmd_validate)
+    validate_parser.set_defaults(func=validate_handler)
     transform_parser = subparsers.add_parser(
         'transform',
@@ -393,7 +394,7 @@ def create_parser() -> argparse.ArgumentParser:
             'File targets infer format from the extension.'
         ),
     )
-    transform_parser.set_defaults(func=cmd_transform)
+    transform_parser.set_defaults(func=transform_handler)
     load_parser = subparsers.add_parser(
         'load',
@@ -417,13 +418,14 @@ def create_parser() -> argparse.ArgumentParser:
         ),
     )
     _add_format_options(load_parser, context='target')
-    load_parser.set_defaults(func=cmd_load)
+    load_parser.set_defaults(func=load_handler)
     pipe_parser = subparsers.add_parser(
         'pipeline',
         help=(
-            'Inspect or run pipeline YAML (see '
-            f'{PROJECT_URL}/blob/main/docs/pipeline-guide.md)'
+            'DEPRECATED: use "list" (for summary/jobs) or "run" (to execute); '
+            'see '
+            f'{PROJECT_URL}/blob/main/docs/pipeline-guide.md'
         ),
         formatter_class=argparse.ArgumentDefaultsHelpFormatter,
     )
@@ -438,35 +440,83 @@ def create_parser() -> argparse.ArgumentParser:
         metavar='JOB',
         help='Run a specific job by name',
     )
-    pipe_parser.set_defaults(func=cmd_pipeline)
+    pipe_parser.set_defaults(func=pipeline_handler)
-    list_parser = subparsers.add_parser(
-        'list',
-        help='List ETL pipeline metadata',
+    render_parser = subparsers.add_parser(
+        'render',
+        help='Render SQL DDL from table schema specs',
         formatter_class=argparse.ArgumentDefaultsHelpFormatter,
     )
-    _add_config_option(list_parser)
+    render_parser.add_argument(
+        '--config',
+        help='Pipeline YAML containing table_schemas',
+    )
+    render_parser.add_argument(
+        '-o',
+        '--output',
+        help='Write SQL to this path (stdout when omitted)',
+    )
+    render_parser.add_argument(
+        '--spec',
+        help='Standalone table spec file (.yml/.yaml/.json)',
+    )
+    render_parser.add_argument(
+        '--table',
+        help='Render only the table matching this name',
+    )
+    render_parser.add_argument(
+        '--template',
+        default='ddl',
+        help='Template key (ddl/view) or path to a Jinja template file',
+    )
+    render_parser.add_argument(
+        '--template-path',
+        dest='template_path',
+        help=(
+            'Explicit path to a Jinja template file (overrides template key).'
+        ),
+    )
+    render_parser.set_defaults(func=render_handler)
+    check_parser = subparsers.add_parser(
+        'check',
+        help='Inspect ETL pipeline metadata',
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
+    )
+    _add_config_option(check_parser)
+    _add_boolean_flag(
+        check_parser,
+        name='jobs',
+        help_text='List ETL jobs',
+    )
     _add_boolean_flag(
-        list_parser,
+        check_parser,
         name='pipelines',
         help_text='List ETL pipelines',
     )
     _add_boolean_flag(
-        list_parser,
+        check_parser,
         name='sources',
         help_text='List data sources',
     )
     _add_boolean_flag(
-        list_parser,
+        check_parser,
+        name='summary',
+        help_text=(
+            'Show pipeline summary (name, version, sources, targets, jobs)'
+        ),
+    )
+    _add_boolean_flag(
+        check_parser,
         name='targets',
         help_text='List data targets',
     )
     _add_boolean_flag(
-        list_parser,
+        check_parser,
         name='transforms',
         help_text='List data transforms',
     )
-    list_parser.set_defaults(func=cmd_list)
+    check_parser.set_defaults(func=check_handler)
     run_parser = subparsers.add_parser(
         'run',
@@ -487,7 +537,7 @@ def create_parser() -> argparse.ArgumentParser:
         '--pipeline',
         help='Name of the pipeline to run',
     )
-    run_parser.set_defaults(func=cmd_run)
+    run_parser.set_defaults(func=run_handler)
     return parser

etlplus 0.4.7__py3-none-any.whl → 0.7.0__py3-none-any.whl

etlplus 0.4.7py3-none-any.whl → 0.7.0py3-none-any.whl