etlplus 0.14.3__py3-none-any.whl → 0.15.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. etlplus/README.md +4 -4
  2. etlplus/api/README.md +33 -2
  3. etlplus/api/config.py +3 -8
  4. etlplus/api/types.py +89 -0
  5. etlplus/api/utils.py +5 -1
  6. etlplus/cli/README.md +2 -2
  7. etlplus/cli/commands.py +75 -42
  8. etlplus/cli/handlers.py +32 -14
  9. etlplus/cli/main.py +1 -1
  10. etlplus/cli/state.py +4 -7
  11. etlplus/database/README.md +2 -2
  12. etlplus/database/engine.py +18 -2
  13. etlplus/database/orm.py +2 -0
  14. etlplus/file/README.md +2 -2
  15. etlplus/file/_io.py +39 -0
  16. etlplus/file/json.py +2 -14
  17. etlplus/file/yaml.py +2 -14
  18. etlplus/ops/run.py +14 -9
  19. etlplus/ops/utils.py +4 -33
  20. etlplus/ops/validate.py +3 -3
  21. etlplus/templates/README.md +2 -2
  22. etlplus/types.py +3 -2
  23. etlplus/utils.py +136 -2
  24. etlplus/{config → workflow}/README.md +6 -6
  25. etlplus/{config → workflow}/__init__.py +10 -23
  26. etlplus/{config → workflow}/connector.py +58 -44
  27. etlplus/{dag.py → workflow/dag.py} +6 -4
  28. etlplus/{config → workflow}/jobs.py +101 -38
  29. etlplus/{config → workflow}/pipeline.py +57 -49
  30. etlplus/{config → workflow}/profile.py +8 -5
  31. etlplus/workflow/types.py +115 -0
  32. {etlplus-0.14.3.dist-info → etlplus-0.15.2.dist-info}/METADATA +4 -4
  33. {etlplus-0.14.3.dist-info → etlplus-0.15.2.dist-info}/RECORD +37 -38
  34. {etlplus-0.14.3.dist-info → etlplus-0.15.2.dist-info}/WHEEL +1 -1
  35. etlplus/config/types.py +0 -204
  36. etlplus/config/utils.py +0 -120
  37. {etlplus-0.14.3.dist-info → etlplus-0.15.2.dist-info}/entry_points.txt +0 -0
  38. {etlplus-0.14.3.dist-info → etlplus-0.15.2.dist-info}/licenses/LICENSE +0 -0
  39. {etlplus-0.14.3.dist-info → etlplus-0.15.2.dist-info}/top_level.txt +0 -0
etlplus/file/yaml.py CHANGED
@@ -18,13 +18,11 @@ Notes
18
18
  from __future__ import annotations
19
19
 
20
20
  from pathlib import Path
21
- from typing import cast
22
21
 
23
22
  from ..types import JSONData
24
- from ..types import JSONDict
25
- from ..types import JSONList
26
23
  from ..utils import count_records
27
24
  from ._imports import get_yaml
25
+ from ._io import coerce_record_payload
28
26
 
29
27
  # SECTION: EXPORTS ========================================================== #
30
28
 
@@ -64,17 +62,7 @@ def read(
64
62
  with path.open('r', encoding='utf-8') as handle:
65
63
  loaded = get_yaml().safe_load(handle)
66
64
 
67
- if isinstance(loaded, dict):
68
- return cast(JSONDict, loaded)
69
- if isinstance(loaded, list):
70
- if all(isinstance(item, dict) for item in loaded):
71
- return cast(JSONList, loaded)
72
- raise TypeError(
73
- 'YAML array must contain only objects (dicts) when loading',
74
- )
75
- raise TypeError(
76
- 'YAML root must be an object or an array of objects when loading',
77
- )
65
+ return coerce_record_payload(loaded, format_name='YAML')
78
66
 
79
67
 
80
68
  def write(
etlplus/ops/run.py CHANGED
@@ -20,7 +20,6 @@ from ..api import RequestOptions
20
20
  from ..api import compose_api_request_env
21
21
  from ..api import compose_api_target_env
22
22
  from ..api import paginate_with_client
23
- from ..config import load_pipeline_config
24
23
  from ..enums import DataConnectorType
25
24
  from ..file import FileFormat
26
25
  from ..types import JSONData
@@ -29,6 +28,7 @@ from ..types import PipelineConfig
29
28
  from ..types import StrPath
30
29
  from ..types import Timeout
31
30
  from ..utils import print_json
31
+ from ..workflow import load_pipeline_config
32
32
  from .extract import extract
33
33
  from .load import load
34
34
  from .transform import transform
@@ -162,9 +162,12 @@ def run(
162
162
  # can monkeypatch this class on etlplus.ops.run.
163
163
  ClientClass = EndpointClient # noqa: N806
164
164
  client = ClientClass(
165
- base_url=cast(str, env['base_url']),
165
+ base_url=cast(str, env.get('base_url')),
166
166
  base_path=cast(str | None, env.get('base_path')),
167
- endpoints=cast(dict[str, str], env['endpoints_map']),
167
+ endpoints=cast(
168
+ dict[str, str],
169
+ env.get('endpoints_map', {}),
170
+ ),
168
171
  retry=env.get('retry'),
169
172
  retry_network_errors=bool(
170
173
  env.get('retry_network_errors', False),
@@ -173,7 +176,7 @@ def run(
173
176
  )
174
177
  data = paginate_with_client(
175
178
  client,
176
- cast(str, env['endpoint_key']),
179
+ cast(str, env.get('endpoint_key')),
177
180
  env.get('params'),
178
181
  env.get('headers'),
179
182
  env.get('timeout'),
@@ -276,12 +279,14 @@ def run(
276
279
  if not url_t:
277
280
  raise ValueError('API target missing "url"')
278
281
  kwargs_t: dict[str, Any] = {}
279
- if env_t.get('headers'):
280
- kwargs_t['headers'] = cast(dict[str, str], env_t['headers'])
282
+ headers = env_t.get('headers')
283
+ if headers:
284
+ kwargs_t['headers'] = cast(dict[str, str], headers)
281
285
  if env_t.get('timeout') is not None:
282
- kwargs_t['timeout'] = env_t['timeout']
283
- if env_t.get('session') is not None:
284
- kwargs_t['session'] = env_t['session']
286
+ kwargs_t['timeout'] = env_t.get('timeout')
287
+ session = env_t.get('session')
288
+ if session is not None:
289
+ kwargs_t['session'] = session
285
290
  result = load(
286
291
  data,
287
292
  'api',
etlplus/ops/utils.py CHANGED
@@ -7,13 +7,11 @@ The helpers defined here embrace a "high cohesion, low coupling" design by
7
7
  isolating normalization, configuration, and logging responsibilities. The
8
8
  resulting surface keeps ``maybe_validate`` focused on orchestration while
9
9
  offloading ancillary concerns to composable helpers.
10
-
11
10
  """
12
11
 
13
12
  from __future__ import annotations
14
13
 
15
14
  from collections.abc import Callable
16
- from collections.abc import Mapping
17
15
  from dataclasses import dataclass
18
16
  from types import MappingProxyType
19
17
  from typing import Any
@@ -23,7 +21,7 @@ from typing import TypedDict
23
21
  from typing import cast
24
22
 
25
23
  from ..types import StrAnyMap
26
- from ..utils import normalized_str
24
+ from ..utils import normalize_choice
27
25
 
28
26
  # SECTION: TYPED DICTIONARIES =============================================== #
29
27
 
@@ -320,7 +318,7 @@ def _normalize_phase(
320
318
  """
321
319
  return cast(
322
320
  ValidationPhase,
323
- _normalize_choice(
321
+ normalize_choice(
324
322
  value,
325
323
  mapping=_PHASE_CHOICES,
326
324
  default='before_transform',
@@ -346,7 +344,7 @@ def _normalize_severity(
346
344
  """
347
345
  return cast(
348
346
  ValidationSeverity,
349
- _normalize_choice(
347
+ normalize_choice(
350
348
  value,
351
349
  mapping=_SEVERITY_CHOICES,
352
350
  default='error',
@@ -372,7 +370,7 @@ def _normalize_window(
372
370
  """
373
371
  return cast(
374
372
  ValidationWindow,
375
- _normalize_choice(
373
+ normalize_choice(
376
374
  value,
377
375
  mapping=_WINDOW_CHOICES,
378
376
  default='both',
@@ -380,33 +378,6 @@ def _normalize_window(
380
378
  )
381
379
 
382
380
 
383
- def _normalize_choice(
384
- value: str | None,
385
- *,
386
- mapping: Mapping[str, str],
387
- default: str,
388
- ) -> str:
389
- """
390
- Normalize a text value against a mapping with a default fallback.
391
-
392
- Parameters
393
- ----------
394
- value : str | None
395
- Input text to normalize.
396
- mapping : Mapping[str, str]
397
- Mapping of accepted values to normalized outputs.
398
- default : str
399
- Default to return when input is missing or unrecognized.
400
-
401
- Returns
402
- -------
403
- str
404
- Normalized value.
405
- """
406
- normalized = normalized_str(value)
407
- return mapping.get(normalized, default)
408
-
409
-
410
381
  def _rule_name(
411
382
  rules: Ruleset,
412
383
  ) -> str | None:
etlplus/ops/validate.py CHANGED
@@ -11,8 +11,8 @@ Highlights
11
11
  ----------
12
12
  - Centralized type map and helpers for clarity and reuse.
13
13
  - Consistent error wording; field and item paths like ``[2].email``.
14
- - Small, focused public API with ``load_data``, ``validate_field``,
15
- ``validate``.
14
+ - Small, focused public API with :func:`load_data`, :func:`validate_field`,
15
+ :func:`validate`.
16
16
 
17
17
  Examples
18
18
  --------
@@ -66,7 +66,7 @@ TYPE_MAP: Final[dict[str, type | tuple[type, ...]]] = {
66
66
  }
67
67
 
68
68
 
69
- # SECTION: CLASSES ========================================================== #
69
+ # SECTION: TYPED DICTS ====================================================== #
70
70
 
71
71
 
72
72
  class FieldRules(TypedDict, total=False):
@@ -1,4 +1,4 @@
1
- # etlplus.templates subpackage
1
+ # `etlplus.templates` Subpackage
2
2
 
3
3
  Documentation for the `etlplus.templates` subpackage: SQL and DDL template helpers.
4
4
 
@@ -8,7 +8,7 @@ Documentation for the `etlplus.templates` subpackage: SQL and DDL template helpe
8
8
 
9
9
  Back to project overview: see the top-level [README](../../README.md).
10
10
 
11
- - [etlplus.templates subpackage](#etlpustemplates-subpackage)
11
+ - [`etlplus.templates` Subpackage](#etlplus-templates-subpackage)
12
12
  - [Available Templates](#available-templates)
13
13
  - [Rendering Templates](#rendering-templates)
14
14
  - [Example: Rendering a DDL Template](#example-rendering-a-ddl-template)
etlplus/types.py CHANGED
@@ -11,8 +11,9 @@ Notes
11
11
 
12
12
  See Also
13
13
  --------
14
- - :mod:`etlplus.api.types` for HTTP-specific aliases
15
- - :mod:`etlplus.config.types` for TypedDict surfaces
14
+ - :mod:`etlplus.api.types` for HTTP-specific aliases and data classes
15
+ - :mod:`etlplus.workflow.types` for workflow-specific aliases and TypedDict
16
+ surfaces
16
17
 
17
18
  Examples
18
19
  --------
etlplus/utils.py CHANGED
@@ -8,6 +8,7 @@ from __future__ import annotations
8
8
 
9
9
  import json
10
10
  from collections.abc import Callable
11
+ from collections.abc import Iterable
11
12
  from collections.abc import Mapping
12
13
  from typing import Any
13
14
  from typing import TypeVar
@@ -25,6 +26,7 @@ __all__ = [
25
26
  # Mapping utilities
26
27
  'cast_str_dict',
27
28
  'coerce_dict',
29
+ 'deep_substitute',
28
30
  'maybe_mapping',
29
31
  # Float coercion
30
32
  'to_float',
@@ -39,7 +41,8 @@ __all__ = [
39
41
  # Generic number coercion
40
42
  'to_number',
41
43
  # Text processing
42
- 'normalized_str',
44
+ 'normalize_choice',
45
+ 'normalize_str',
43
46
  ]
44
47
 
45
48
 
@@ -56,6 +59,52 @@ Num = TypeVar('Num', int, float)
56
59
  # -- Data Utilities -- #
57
60
 
58
61
 
62
+ def deep_substitute(
63
+ value: Any,
64
+ vars_map: StrAnyMap | None,
65
+ env_map: Mapping[str, str] | None,
66
+ ) -> Any:
67
+ """
68
+ Recursively substitute ``${VAR}`` tokens in nested structures.
69
+
70
+ Only strings are substituted; other types are returned as-is.
71
+
72
+ Parameters
73
+ ----------
74
+ value : Any
75
+ The value to perform substitutions on.
76
+ vars_map : StrAnyMap | None
77
+ Mapping of variable names to replacement values (lower precedence).
78
+ env_map : Mapping[str, str] | None
79
+ Mapping of environment variables overriding ``vars_map`` values
80
+ (higher precedence).
81
+
82
+ Returns
83
+ -------
84
+ Any
85
+ New structure with substitutions applied where tokens were found.
86
+ """
87
+ substitutions = _prepare_substitutions(vars_map, env_map)
88
+
89
+ def _apply(node: Any) -> Any:
90
+ match node:
91
+ case str():
92
+ return _replace_tokens(node, substitutions)
93
+ case Mapping():
94
+ return {k: _apply(v) for k, v in node.items()}
95
+ case list() | tuple() as seq:
96
+ apply = [_apply(item) for item in seq]
97
+ return apply if isinstance(seq, list) else tuple(apply)
98
+ case set():
99
+ return {_apply(item) for item in node}
100
+ case frozenset():
101
+ return frozenset(_apply(item) for item in node)
102
+ case _:
103
+ return node
104
+
105
+ return _apply(value)
106
+
107
+
59
108
  def cast_str_dict(
60
109
  mapping: StrAnyMap | None,
61
110
  ) -> dict[str, str]:
@@ -372,7 +421,7 @@ def to_number(
372
421
  # -- Text Processing -- #
373
422
 
374
423
 
375
- def normalized_str(
424
+ def normalize_str(
376
425
  value: str | None,
377
426
  ) -> str:
378
427
  """
@@ -392,6 +441,36 @@ def normalized_str(
392
441
  return (value or '').strip().lower()
393
442
 
394
443
 
444
+ def normalize_choice(
445
+ value: str | None,
446
+ *,
447
+ mapping: Mapping[str, str],
448
+ default: str,
449
+ normalize: Callable[[str | None], str] = normalize_str,
450
+ ) -> str:
451
+ """
452
+ Normalize a string choice using a mapping and fallback.
453
+
454
+ Parameters
455
+ ----------
456
+ value : str | None
457
+ Input value to normalize.
458
+ mapping : Mapping[str, str]
459
+ Mapping of acceptable normalized inputs to output values.
460
+ default : str
461
+ Default return value when input is missing or unrecognized.
462
+ normalize : Callable[[str | None], str], optional
463
+ Normalization function applied to *value*. Defaults to
464
+ :func:`normalize_str`.
465
+
466
+ Returns
467
+ -------
468
+ str
469
+ Normalized mapped value or ``default``.
470
+ """
471
+ return mapping.get(normalize(value), default)
472
+
473
+
395
474
  # SECTION: INTERNAL FUNCTIONS =============================================== #
396
475
 
397
476
 
@@ -425,6 +504,61 @@ def _clamp(
425
504
  return value
426
505
 
427
506
 
507
+ def _prepare_substitutions(
508
+ vars_map: StrAnyMap | None,
509
+ env_map: Mapping[str, Any] | None,
510
+ ) -> tuple[tuple[str, Any], ...]:
511
+ """
512
+ Merge variable and environment maps into an ordered substitutions list.
513
+
514
+ Parameters
515
+ ----------
516
+ vars_map : StrAnyMap | None
517
+ Mapping of variable names to replacement values (lower precedence).
518
+ env_map : Mapping[str, Any] | None
519
+ Environment-backed values that override entries from ``vars_map``.
520
+
521
+ Returns
522
+ -------
523
+ tuple[tuple[str, Any], ...]
524
+ Immutable sequence of ``(name, value)`` pairs suitable for token
525
+ replacement.
526
+ """
527
+ if not vars_map and not env_map:
528
+ return ()
529
+ merged: dict[str, Any] = {**(vars_map or {}), **(env_map or {})}
530
+ return tuple(merged.items())
531
+
532
+
533
+ def _replace_tokens(
534
+ text: str,
535
+ substitutions: Iterable[tuple[str, Any]],
536
+ ) -> str:
537
+ """
538
+ Replace ``${VAR}`` tokens in ``text`` using ``substitutions``.
539
+
540
+ Parameters
541
+ ----------
542
+ text : str
543
+ Input string that may contain ``${VAR}`` tokens.
544
+ substitutions : Iterable[tuple[str, Any]]
545
+ Sequence of ``(name, value)`` pairs used for token replacement.
546
+
547
+ Returns
548
+ -------
549
+ str
550
+ Updated text with replacements applied.
551
+ """
552
+ if not substitutions:
553
+ return text
554
+ out = text
555
+ for name, replacement in substitutions:
556
+ token = f'${{{name}}}'
557
+ if token in out:
558
+ out = out.replace(token, str(replacement))
559
+ return out
560
+
561
+
428
562
  def _coerce_float(
429
563
  value: object,
430
564
  ) -> float | None:
@@ -1,7 +1,7 @@
1
- # etlplus.config subpackage
1
+ # `etlplus.workflow` Subpackage
2
2
 
3
- Documentation for the `etlplus.config` subpackage: configuration helpers for connectors, pipelines,
4
- jobs, and profiles.
3
+ Documentation for the `etlplus.workflow` subpackage: configuration helpers for connectors,
4
+ pipelines, jobs, and profiles.
5
5
 
6
6
  - Provides classes and utilities for managing ETL pipeline configuration
7
7
  - Supports YAML/JSON config loading and validation
@@ -10,7 +10,7 @@ jobs, and profiles.
10
10
 
11
11
  Back to project overview: see the top-level [README](../../README.md).
12
12
 
13
- - [etlplus.config subpackage](#etlplusconfig-subpackage)
13
+ - [`etlplus.workflow` Subpackage](#etlplusworkflow-subpackage)
14
14
  - [Supported Configuration Types](#supported-configuration-types)
15
15
  - [Loading and Validating Configs](#loading-and-validating-configs)
16
16
  - [Example: Loading a Pipeline Config](#example-loading-a-pipeline-config)
@@ -28,7 +28,7 @@ Back to project overview: see the top-level [README](../../README.md).
28
28
  Use the provided classes to load and validate configuration files:
29
29
 
30
30
  ```python
31
- from etlplus.config import PipelineConfig
31
+ from etlplus.workflow import PipelineConfig
32
32
 
33
33
  cfg = PipelineConfig.from_yaml("pipeline.yml")
34
34
  ```
@@ -39,7 +39,7 @@ cfg = PipelineConfig.from_yaml("pipeline.yml")
39
39
  ## Example: Loading a Pipeline Config
40
40
 
41
41
  ```python
42
- from etlplus.config import PipelineConfig
42
+ from etlplus.workflow import PipelineConfig
43
43
 
44
44
  pipeline = PipelineConfig.from_yaml("configs/pipeline.yml")
45
45
  print(pipeline)
@@ -1,17 +1,7 @@
1
1
  """
2
- :mod:`etlplus.config` package.
2
+ :mod:`etlplus.workflow` package.
3
3
 
4
- Configuration models and helpers for ETLPlus.
5
-
6
- This package defines models for data sources/targets ("connectors"), APIs,
7
- pagination/rate limits, pipeline orchestration, and related utilities. The
8
- parsers are permissive (accepting ``Mapping[str, Any]``) and normalize to
9
- concrete types without raising on unknown/optional fields.
10
-
11
- Notes
12
- -----
13
- - The models use ``@dataclass(slots=True)`` and avoid mutating inputs.
14
- - TypedDicts are editor/type-checking hints and are not enforced at runtime.
4
+ Job workflow helpers.
15
5
  """
16
6
 
17
7
  from __future__ import annotations
@@ -21,6 +11,7 @@ from .connector import ConnectorApi
21
11
  from .connector import ConnectorDb
22
12
  from .connector import ConnectorFile
23
13
  from .connector import parse_connector
14
+ from .dag import topological_sort_jobs
24
15
  from .jobs import ExtractRef
25
16
  from .jobs import JobConfig
26
17
  from .jobs import LoadRef
@@ -28,29 +19,25 @@ from .jobs import TransformRef
28
19
  from .jobs import ValidationRef
29
20
  from .pipeline import PipelineConfig
30
21
  from .pipeline import load_pipeline_config
31
- from .profile import ProfileConfig
32
- from .types import ConnectorType
33
22
 
34
23
  # SECTION: EXPORTS ========================================================== #
35
24
 
36
25
 
37
26
  __all__ = [
38
- # Connectors
39
- 'Connector',
40
- 'ConnectorType',
27
+ # Data Classes
41
28
  'ConnectorApi',
42
29
  'ConnectorDb',
43
30
  'ConnectorFile',
44
- 'parse_connector',
45
- # Jobs / Refs
46
31
  'ExtractRef',
47
32
  'JobConfig',
48
33
  'LoadRef',
34
+ 'PipelineConfig',
49
35
  'TransformRef',
50
36
  'ValidationRef',
51
- # Pipeline
52
- 'PipelineConfig',
37
+ # Functions
53
38
  'load_pipeline_config',
54
- # Profile
55
- 'ProfileConfig',
39
+ 'parse_connector',
40
+ 'topological_sort_jobs',
41
+ # Type Aliases
42
+ 'Connector',
56
43
  ]