etlplus 0.16.0__py3-none-any.whl → 0.16.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (98) hide show
  1. etlplus/README.md +24 -2
  2. etlplus/__init__.py +2 -0
  3. etlplus/api/__init__.py +14 -14
  4. etlplus/api/auth.py +9 -6
  5. etlplus/api/config.py +6 -6
  6. etlplus/api/endpoint_client.py +16 -16
  7. etlplus/api/enums.py +2 -2
  8. etlplus/api/errors.py +4 -4
  9. etlplus/api/pagination/__init__.py +6 -6
  10. etlplus/api/pagination/config.py +11 -9
  11. etlplus/api/rate_limiting/__init__.py +2 -2
  12. etlplus/api/rate_limiting/config.py +10 -10
  13. etlplus/api/rate_limiting/rate_limiter.py +2 -2
  14. etlplus/api/request_manager.py +4 -4
  15. etlplus/api/retry_manager.py +6 -6
  16. etlplus/api/transport.py +10 -10
  17. etlplus/api/types.py +47 -26
  18. etlplus/api/utils.py +49 -49
  19. etlplus/cli/README.md +9 -7
  20. etlplus/cli/commands.py +22 -22
  21. etlplus/cli/handlers.py +12 -13
  22. etlplus/cli/main.py +1 -1
  23. etlplus/{workflow/pipeline.py → config.py} +54 -91
  24. etlplus/connector/__init__.py +6 -6
  25. etlplus/connector/api.py +7 -7
  26. etlplus/connector/database.py +3 -3
  27. etlplus/connector/file.py +3 -3
  28. etlplus/connector/types.py +2 -2
  29. etlplus/database/README.md +7 -7
  30. etlplus/enums.py +35 -167
  31. etlplus/file/README.md +7 -5
  32. etlplus/file/accdb.py +2 -1
  33. etlplus/file/arrow.py +2 -1
  34. etlplus/file/bson.py +2 -1
  35. etlplus/file/cbor.py +2 -1
  36. etlplus/file/cfg.py +1 -1
  37. etlplus/file/conf.py +1 -1
  38. etlplus/file/dat.py +1 -1
  39. etlplus/file/dta.py +1 -1
  40. etlplus/file/duckdb.py +2 -1
  41. etlplus/file/enums.py +1 -1
  42. etlplus/file/fwf.py +2 -1
  43. etlplus/file/hbs.py +2 -1
  44. etlplus/file/hdf5.py +2 -1
  45. etlplus/file/ini.py +2 -1
  46. etlplus/file/ion.py +1 -1
  47. etlplus/file/jinja2.py +2 -1
  48. etlplus/file/log.py +1 -1
  49. etlplus/file/mat.py +1 -1
  50. etlplus/file/mdb.py +2 -1
  51. etlplus/file/msgpack.py +2 -1
  52. etlplus/file/mustache.py +2 -1
  53. etlplus/file/nc.py +1 -1
  54. etlplus/file/numbers.py +2 -1
  55. etlplus/file/ods.py +2 -1
  56. etlplus/file/pb.py +2 -1
  57. etlplus/file/pbf.py +2 -1
  58. etlplus/file/properties.py +2 -1
  59. etlplus/file/proto.py +2 -1
  60. etlplus/file/psv.py +2 -1
  61. etlplus/file/rda.py +2 -1
  62. etlplus/file/rds.py +1 -1
  63. etlplus/file/sas7bdat.py +2 -1
  64. etlplus/file/sav.py +1 -1
  65. etlplus/file/sqlite.py +2 -1
  66. etlplus/file/sylk.py +2 -1
  67. etlplus/file/tab.py +2 -1
  68. etlplus/file/toml.py +2 -1
  69. etlplus/file/vm.py +2 -1
  70. etlplus/file/wks.py +2 -1
  71. etlplus/file/xls.py +1 -1
  72. etlplus/file/xlsm.py +2 -2
  73. etlplus/file/xpt.py +2 -1
  74. etlplus/file/zsav.py +2 -1
  75. etlplus/ops/README.md +10 -9
  76. etlplus/ops/__init__.py +1 -0
  77. etlplus/ops/enums.py +173 -0
  78. etlplus/ops/extract.py +209 -22
  79. etlplus/ops/load.py +140 -34
  80. etlplus/ops/run.py +88 -103
  81. etlplus/ops/transform.py +46 -27
  82. etlplus/ops/types.py +147 -0
  83. etlplus/ops/utils.py +5 -5
  84. etlplus/ops/validate.py +13 -13
  85. etlplus/templates/README.md +11 -9
  86. etlplus/types.py +5 -102
  87. etlplus/workflow/README.md +0 -24
  88. etlplus/workflow/__init__.py +2 -4
  89. etlplus/workflow/dag.py +23 -1
  90. etlplus/workflow/jobs.py +15 -28
  91. etlplus/workflow/profile.py +4 -2
  92. {etlplus-0.16.0.dist-info → etlplus-0.16.7.dist-info}/METADATA +32 -28
  93. etlplus-0.16.7.dist-info/RECORD +143 -0
  94. etlplus-0.16.0.dist-info/RECORD +0 -141
  95. {etlplus-0.16.0.dist-info → etlplus-0.16.7.dist-info}/WHEEL +0 -0
  96. {etlplus-0.16.0.dist-info → etlplus-0.16.7.dist-info}/entry_points.txt +0 -0
  97. {etlplus-0.16.0.dist-info → etlplus-0.16.7.dist-info}/licenses/LICENSE +0 -0
  98. {etlplus-0.16.0.dist-info → etlplus-0.16.7.dist-info}/top_level.txt +0 -0
etlplus/ops/types.py ADDED
@@ -0,0 +1,147 @@
1
+ """
2
+ :mod:`etlplus.ops.types` module.
3
+
4
+ Shared type aliases leveraged across :mod:`etlplus.ops` modules.
5
+
6
+ Notes
7
+ -----
8
+ - Centralizes ops-focused aliases (functions, specs, and pipeline helpers).
9
+ - Relies on Python 3.13 ``type`` statements for readability and IDE support.
10
+
11
+ Examples
12
+ --------
13
+ >>> from etlplus.ops.types import AggregateFunc, OperatorFunc
14
+ >>> def total(xs: list[float], _: int) -> float:
15
+ ... return sum(xs)
16
+ >>> agg: AggregateFunc = total
17
+ >>> op: OperatorFunc = lambda a, b: a == b
18
+ """
19
+
20
+ from __future__ import annotations
21
+
22
+ from collections.abc import Callable
23
+ from collections.abc import Mapping
24
+ from collections.abc import Sequence
25
+ from typing import Any
26
+ from typing import Literal
27
+
28
+ from ..types import JSONList
29
+ from ..types import StrAnyMap
30
+ from ..types import StrSeqMap
31
+ from ..types import StrStrMap
32
+
33
+ # SECTION: EXPORTS ========================================================== #
34
+
35
+
36
+ __all__ = [
37
+ # Type Aliases (Functions)
38
+ 'AggregateFunc',
39
+ 'OperatorFunc',
40
+ # Type Aliases (Records & Fields)
41
+ 'FieldName',
42
+ 'Fields',
43
+ # Type Aliases (Transform Specs)
44
+ 'AggregateSpec',
45
+ 'FilterSpec',
46
+ 'MapSpec',
47
+ 'SelectSpec',
48
+ 'SortSpec',
49
+ # Type Aliases (Pipelines)
50
+ 'StepOrSteps',
51
+ 'StepSeq',
52
+ 'StepSpec',
53
+ 'PipelineConfig',
54
+ 'PipelineStepName',
55
+ # Type Aliases (Helpers)
56
+ 'StepApplier',
57
+ 'SortKey',
58
+ ]
59
+
60
+
61
+ # SECTION: TYPE ALIASES ===================================================== #
62
+
63
+
64
+ # -- Functions -- #
65
+
66
+
67
+ # TODO: Consider redefining to use `functools.reduce` signature.
68
+ # TODO: Consider adding `**kwargs` to support richer aggregation functions.
69
+ # TODO: Consider constraining first argument to `Sequence[float]`.
70
+ # TODO: Consider constraining return type to `float | int | None`.
71
+ # Callable reducing numeric collections into a summary value.
72
+ type AggregateFunc = Callable[[list[float], int], Any]
73
+
74
+ # Binary predicate consumed by filter operations.
75
+ type OperatorFunc = Callable[[Any, Any], bool]
76
+
77
+ # -- Records & Fields -- #
78
+
79
+ # Individual field identifier referenced inside specs.
80
+ type FieldName = str
81
+
82
+ # Ordered list of :data:`FieldName` entries preserving projection order.
83
+ type Fields = list[FieldName]
84
+
85
+ # -- Transform Specs -- #
86
+
87
+ # Filtering spec expecting ``field``, ``op``, and ``value`` keys.
88
+ type FilterSpec = StrAnyMap
89
+
90
+ # Field renaming instructions mapping old keys to new ones.
91
+ type MapSpec = StrStrMap
92
+
93
+ # Projection spec as a field list or mapping with metadata.
94
+ #
95
+ # Examples
96
+ # --------
97
+ # >>> from etlplus.ops.types import SelectSpec
98
+ # >>> spec1: SelectSpec = ['a','b']
99
+ # >>> spec2: SelectSpec = {'fields': [...]}
100
+ type SelectSpec = Fields | StrSeqMap
101
+
102
+ # Sort directive expressed as a field string or mapping with flags.
103
+ #
104
+ # Examples
105
+ # --------
106
+ # >>> from etlplus.ops.types import SortSpec
107
+ # >>> spec1: SortSpec = 'field'
108
+ # >>> spec2: SortSpec = {'field': 'x', 'reverse': True}
109
+ type SortSpec = str | StrAnyMap
110
+
111
+ # Aggregate instruction covering ``field``, ``func``, and optional alias.
112
+ #
113
+ # Supported functions: ``avg``, ``count``, ``max``, ``min``, and ``sum``.
114
+ # Examples
115
+ # --------
116
+ # >>> from etlplus.ops.types import AggregateSpec
117
+ # >>> spec: AggregateSpec = \
118
+ # ... {'field': 'x', 'func': 'sum' | 'avg' | ..., 'alias'?: '...'}
119
+ type AggregateSpec = StrAnyMap
120
+
121
+ # -- Pipelines-- #
122
+
123
+ # Unified pipeline step spec consumed by :mod:`etlplus.ops.transform`.
124
+ type StepSpec = AggregateSpec | FilterSpec | MapSpec | SelectSpec | SortSpec
125
+
126
+ # Collections of steps
127
+
128
+ # Ordered collection of :data:`StepSpec` entries.
129
+ type StepSeq = Sequence[StepSpec]
130
+
131
+ # Accepts either a single :data:`StepSpec` or a sequence of them.
132
+ type StepOrSteps = StepSpec | StepSeq
133
+
134
+ # Canonical literal names for supported transform stages.
135
+ type PipelineStepName = Literal['aggregate', 'filter', 'map', 'select', 'sort']
136
+
137
+ # Mapping from step name to its associated specification payload.
138
+ # TODO: Consider replacing with etlplus.workflow.types.PipelineConfig.
139
+ type PipelineConfig = Mapping[PipelineStepName, StepOrSteps]
140
+
141
+ # -- Helpers -- #
142
+
143
+ # Callable that applies step configuration to a batch of records.
144
+ type StepApplier = Callable[[JSONList, Any], JSONList]
145
+
146
+ # Tuple combining stable sort index and computed sort value.
147
+ type SortKey = tuple[int, Any]
etlplus/ops/utils.py CHANGED
@@ -26,7 +26,7 @@ from ..utils import normalize_choice
26
26
  # SECTION: TYPED DICTIONARIES =============================================== #
27
27
 
28
28
 
29
- class ValidationResult(TypedDict, total=False):
29
+ class ValidationResultDict(TypedDict, total=False):
30
30
  """Shape returned by ``validate_fn`` callables."""
31
31
 
32
32
  valid: bool
@@ -44,7 +44,7 @@ type ValidationPhase = Literal['before_transform', 'after_transform']
44
44
  type ValidationWindow = Literal['before_transform', 'after_transform', 'both']
45
45
  type ValidationSeverity = Literal['warn', 'error']
46
46
 
47
- type ValidateFn = Callable[[Any, Ruleset], ValidationResult]
47
+ type ValidateFn = Callable[[Any, Ruleset], ValidationResultDict]
48
48
  type PrintFn = Callable[[Any], None]
49
49
 
50
50
 
@@ -198,7 +198,7 @@ def maybe_validate(
198
198
  Failure severity (``"warn"`` or ``"error"``).
199
199
  validate_fn : ValidateFn
200
200
  Engine that performs validation and returns a
201
- :class:`ValidationResult` instance.
201
+ :class:`ValidationResultDict` instance.
202
202
  print_json_fn : PrintFn
203
203
  Structured logger invoked when validation fails.
204
204
 
@@ -270,7 +270,7 @@ def _log_failure(
270
270
  phase: ValidationPhase,
271
271
  window: ValidationWindow,
272
272
  ruleset_name: str | None,
273
- result: ValidationResult,
273
+ result: ValidationResultDict,
274
274
  ) -> None:
275
275
  """
276
276
  Emit a structured message describing the failed validation.
@@ -285,7 +285,7 @@ def _log_failure(
285
285
  Configured validation window.
286
286
  ruleset_name : str | None
287
287
  Name of the validation ruleset.
288
- result : ValidationResult
288
+ result : ValidationResultDict
289
289
  Result of the failed validation.
290
290
  """
291
291
  printer(
etlplus/ops/validate.py CHANGED
@@ -44,9 +44,9 @@ from .load import load_data
44
44
 
45
45
 
46
46
  __all__ = [
47
- 'FieldRules',
48
- 'FieldValidation',
49
- 'Validation',
47
+ 'FieldRulesDict',
48
+ 'FieldValidationDict',
49
+ 'ValidationDict',
50
50
  'validate_field',
51
51
  'validate',
52
52
  ]
@@ -69,7 +69,7 @@ TYPE_MAP: Final[dict[str, type | tuple[type, ...]]] = {
69
69
  # SECTION: TYPED DICTS ====================================================== #
70
70
 
71
71
 
72
- class FieldRules(TypedDict, total=False):
72
+ class FieldRulesDict(TypedDict, total=False):
73
73
  """
74
74
  Validation rules for a single field.
75
75
 
@@ -93,7 +93,7 @@ class FieldRules(TypedDict, total=False):
93
93
  enum: list[Any]
94
94
 
95
95
 
96
- class FieldValidation(TypedDict):
96
+ class FieldValidationDict(TypedDict):
97
97
  """
98
98
  Validation result for a single field.
99
99
 
@@ -109,7 +109,7 @@ class FieldValidation(TypedDict):
109
109
  errors: list[str]
110
110
 
111
111
 
112
- class Validation(TypedDict):
112
+ class ValidationDict(TypedDict):
113
113
  """
114
114
  Validation result for a complete data structure.
115
115
 
@@ -134,7 +134,7 @@ class Validation(TypedDict):
134
134
  # SECTION: TYPE ALIASES ===================================================== #
135
135
 
136
136
 
137
- type RulesMap = Mapping[str, FieldRules]
137
+ type RulesMap = Mapping[str, FieldRulesDict]
138
138
 
139
139
 
140
140
  # SECTION: INTERNAL FUNCTIONS ============================================== #
@@ -339,8 +339,8 @@ def _validate_record(
339
339
 
340
340
  def validate_field(
341
341
  value: Any,
342
- rules: StrAnyMap | FieldRules,
343
- ) -> FieldValidation:
342
+ rules: StrAnyMap | FieldRulesDict,
343
+ ) -> FieldValidationDict:
344
344
  """
345
345
  Validate a single value against field rules.
346
346
 
@@ -348,14 +348,14 @@ def validate_field(
348
348
  ----------
349
349
  value : Any
350
350
  The value to validate. ``None`` is treated as missing.
351
- rules : StrAnyMap | FieldRules
351
+ rules : StrAnyMap | FieldRulesDict
352
352
  Rule dictionary. Supported keys include ``required``, ``type``,
353
353
  ``min``, ``max``, ``minLength``, ``maxLength``, ``pattern``, and
354
354
  ``enum``.
355
355
 
356
356
  Returns
357
357
  -------
358
- FieldValidation
358
+ FieldValidationDict
359
359
  Result with ``valid`` and a list of ``errors``.
360
360
 
361
361
  Notes
@@ -438,7 +438,7 @@ def validate_field(
438
438
  def validate(
439
439
  source: StrPath | JSONData,
440
440
  rules: RulesMap | None = None,
441
- ) -> Validation:
441
+ ) -> ValidationDict:
442
442
  """
443
443
  Validate data against rules.
444
444
 
@@ -452,7 +452,7 @@ def validate(
452
452
 
453
453
  Returns
454
454
  -------
455
- Validation
455
+ ValidationDict
456
456
  Structured result with keys ``valid``, ``errors``, ``field_errors``,
457
457
  and ``data``. If loading fails, ``data`` is ``None`` and an error is
458
458
  reported in ``errors``.
@@ -1,10 +1,11 @@
1
1
  # `etlplus.templates` Subpackage
2
2
 
3
- Documentation for the `etlplus.templates` subpackage: SQL and DDL template helpers.
3
+ Documentation for the `etlplus.templates` subpackage: bundled SQL/DDL templates used by the database
4
+ helpers.
4
5
 
5
6
  - Provides Jinja2 templates for DDL and view generation
6
- - Supports templated SQL for multiple database backends
7
- - Includes helpers for rendering templates with schema metadata
7
+ - Used by `etlplus.database.render_table_sql` and related helpers
8
+ - Exposed as plain template files you can reuse with your own Jinja2 setup
8
9
 
9
10
  Back to project overview: see the top-level [README](../../README.md).
10
11
 
@@ -21,21 +22,22 @@ Back to project overview: see the top-level [README](../../README.md).
21
22
 
22
23
  ## Rendering Templates
23
24
 
24
- Use the helpers to render templates with your schema or table metadata:
25
+ ETLPlus does not currently expose a `render_template` helper in this package. Use the database
26
+ helpers instead:
25
27
 
26
28
  ```python
27
- from etlplus.templates import render_template
29
+ from etlplus.database import render_table_sql, load_table_spec
28
30
 
29
- sql = render_template("ddl.sql.j2", schema=my_schema)
31
+ spec = load_table_spec("schemas/users.yml")
32
+ sql = render_table_sql(spec, template="ddl")
30
33
  ```
31
34
 
32
35
  ## Example: Rendering a DDL Template
33
36
 
34
37
  ```python
35
- from etlplus.templates import render_template
38
+ from etlplus.database import render_tables_to_string
36
39
 
37
- schema = {"name": "users", "columns": [ ... ]}
38
- sql = render_template("ddl.sql.j2", schema=schema)
40
+ sql = render_tables_to_string(["schemas/users.yml"], template="ddl")
39
41
  print(sql)
40
42
  ```
41
43
 
etlplus/types.py CHANGED
@@ -12,12 +12,12 @@ Notes
12
12
  See Also
13
13
  --------
14
14
  - :mod:`etlplus.api.types` for HTTP-specific aliases and data classes
15
- - :mod:`etlplus.connector.types` for connector-specific aliases and TypedDict
16
- surfaces
15
+ - :mod:`etlplus.connector.types` for connector-specific aliases
17
16
 
18
17
  Examples
19
18
  --------
20
- >>> from etlplus.types import JSONDict, PipelineConfig
19
+ >>> from etlplus.types import JSONDict
20
+ >>> from etlplus.ops.types import PipelineConfig
21
21
  >>> payload: JSONDict = {'id': 1, 'name': 'Ada'}
22
22
  >>> isinstance(payload, dict)
23
23
  True
@@ -54,33 +54,15 @@ __all__ = [
54
54
  'JSONRecords',
55
55
  # Type Aliases (File System)
56
56
  'StrPath',
57
- # Type Aliases (Functions)
58
- 'AggregateFunc',
59
- 'OperatorFunc',
60
- # Type Aliases (Records & Fields)
61
- 'FieldName',
62
- 'Fields',
63
57
  # Type Aliases (Transform Specs)
64
58
  'StrAnyMap',
65
59
  'StrSeqMap',
66
60
  'StrStrMap',
67
- 'AggregateSpec',
68
- 'FilterSpec',
69
- 'MapSpec',
70
- 'SelectSpec',
71
- 'SortSpec',
72
- # Type Aliases (Pipelines)
73
- 'StepOrSteps',
74
- 'StepSeq',
75
- 'StepSpec',
76
- 'PipelineStepName',
77
- 'PipelineConfig',
78
- # Type Aliases (Helpers)
79
- 'StepApplier',
80
- 'SortKey',
81
61
  # Type Aliases (Networking / Runtime)
82
62
  'Sleeper',
83
63
  'Timeout',
64
+ # Type Aliases (Templates)
65
+ 'TemplateKey',
84
66
  ]
85
67
 
86
68
 
@@ -125,22 +107,6 @@ type JSONRecords = list[JSONRecord]
125
107
  # Path-like inputs accepted by file helpers.
126
108
  type StrPath = str | Path | PathLike[str]
127
109
 
128
- # -- Functions -- #
129
-
130
- # Callable reducing numeric collections into a summary value.
131
- type AggregateFunc = Callable[[list[float], int], Any]
132
-
133
- # Binary predicate consumed by filter operations.
134
- type OperatorFunc = Callable[[Any, Any], bool]
135
-
136
- # -- Records & Fields -- #
137
-
138
- # Individual field identifier referenced inside specs.
139
- type FieldName = str
140
-
141
- # Ordered list of :data:`FieldName` entries preserving projection order.
142
- type Fields = list[FieldName]
143
-
144
110
  # -- Transform Specs -- #
145
111
 
146
112
  # Kept intentionally broad for runtime-friendly validation in transform.py.
@@ -156,69 +122,6 @@ type StrStrMap = Mapping[str, str]
156
122
  # Mapping whose values are homogeneous sequences.
157
123
  type StrSeqMap = Mapping[str, Sequence[Any]]
158
124
 
159
- # Transform step specifications
160
-
161
- # Filtering spec expecting ``field``, ``op``, and ``value`` keys.
162
- type FilterSpec = StrAnyMap
163
-
164
- # Field renaming instructions mapping old keys to new ones.
165
- type MapSpec = StrStrMap
166
-
167
- # Projection spec as a field list or mapping with metadata.
168
- #
169
- # Examples
170
- # --------
171
- # >>> from etlplus.types import SelectSpec
172
- # >>> spec1: SelectSpec = ['a','b']
173
- # >>> spec2: SelectSpec = {'fields': [...]}
174
- type SelectSpec = Fields | StrSeqMap
175
-
176
- # Sort directive expressed as a field string or mapping with flags.
177
- #
178
- # Examples
179
- # --------
180
- # >>> from etlplus.types import SortSpec
181
- # >>> spec1: SortSpec = 'field'
182
- # >>> spec2: SortSpec = {'field': 'x', 'reverse': True}
183
- type SortSpec = str | StrAnyMap
184
-
185
- # Aggregate instruction covering ``field``, ``func``, and optional alias.
186
- #
187
- # Supported functions: ``avg``, ``count``, ``max``, ``min``, and ``sum``.
188
- # Examples
189
- # --------
190
- # >>> from etlplus.types import AggregateSpec
191
- # >>> spec: AggregateSpec = \
192
- # ... {'field': 'x', 'func': 'sum' | 'avg' | ..., 'alias'?: '...'}
193
- type AggregateSpec = StrAnyMap
194
-
195
- # -- Pipelines-- #
196
-
197
- # Unified pipeline step spec consumed by :mod:`etlplus.ops.transform`.
198
- type StepSpec = AggregateSpec | FilterSpec | MapSpec | SelectSpec | SortSpec
199
-
200
- # Collections of steps
201
-
202
- # Ordered collection of :data:`StepSpec` entries.
203
- type StepSeq = Sequence[StepSpec]
204
-
205
- # Accepts either a single :data:`StepSpec` or a sequence of them.
206
- type StepOrSteps = StepSpec | StepSeq
207
-
208
- # Canonical literal names for supported transform stages.
209
- type PipelineStepName = Literal['filter', 'map', 'select', 'sort', 'aggregate']
210
-
211
- # Mapping from step name to its associated specification payload.
212
- type PipelineConfig = Mapping[PipelineStepName, StepOrSteps]
213
-
214
- # -- Helpers -- #
215
-
216
- # Callable that applies step configuration to a batch of records.
217
- type StepApplier = Callable[[JSONList, Any], JSONList]
218
-
219
- # Tuple combining stable sort index and computed sort value.
220
- type SortKey = tuple[int, Any]
221
-
222
125
  # -- Networking / Runtime -- #
223
126
 
224
127
  # Sleep function used by retry helpers.
@@ -12,8 +12,6 @@ Back to project overview: see the top-level [README](../../README.md).
12
12
 
13
13
  - [`etlplus.workflow` Subpackage](#etlplusworkflow-subpackage)
14
14
  - [Supported Configuration Types](#supported-configuration-types)
15
- - [Loading and Validating Configs](#loading-and-validating-configs)
16
- - [Example: Loading a Pipeline Config](#example-loading-a-pipeline-config)
17
15
  - [See Also](#see-also)
18
16
 
19
17
  ## Supported Configuration Types
@@ -23,28 +21,6 @@ Back to project overview: see the top-level [README](../../README.md).
23
21
  - **Pipeline**: End-to-end pipeline configuration
24
22
  - **Profile**: User or environment-specific settings
25
23
 
26
- ## Loading and Validating Configs
27
-
28
- Use the provided classes to load and validate configuration files:
29
-
30
- ```python
31
- from etlplus.workflow import PipelineConfig
32
-
33
- cfg = PipelineConfig.from_yaml("pipeline.yml")
34
- ```
35
-
36
- - Supports YAML and JSON formats
37
- - Validates against expected schema
38
-
39
- ## Example: Loading a Pipeline Config
40
-
41
- ```python
42
- from etlplus.workflow import PipelineConfig
43
-
44
- pipeline = PipelineConfig.from_yaml("configs/pipeline.yml")
45
- print(pipeline)
46
- ```
47
-
48
24
  ## See Also
49
25
 
50
26
  - Top-level CLI and library usage in the main [README](../../README.md)
@@ -12,8 +12,7 @@ from .jobs import JobConfig
12
12
  from .jobs import LoadRef
13
13
  from .jobs import TransformRef
14
14
  from .jobs import ValidationRef
15
- from .pipeline import PipelineConfig
16
- from .pipeline import load_pipeline_config
15
+ from .profile import ProfileConfig
17
16
 
18
17
  # SECTION: EXPORTS ========================================================== #
19
18
 
@@ -23,10 +22,9 @@ __all__ = [
23
22
  'ExtractRef',
24
23
  'JobConfig',
25
24
  'LoadRef',
26
- 'PipelineConfig',
25
+ 'ProfileConfig',
27
26
  'TransformRef',
28
27
  'ValidationRef',
29
28
  # Functions
30
- 'load_pipeline_config',
31
29
  'topological_sort_jobs',
32
30
  ]
etlplus/workflow/dag.py CHANGED
@@ -47,6 +47,28 @@ class DagError(ValueError):
47
47
  return self.message
48
48
 
49
49
 
50
+ # SECTION: INTERNAL FUNCTIONS =============================================== #
51
+
52
+
53
+ def _ready(
54
+ indegree: dict[str, int],
55
+ ) -> list[str]:
56
+ """
57
+ Return a sorted list of nodes with zero indegree.
58
+
59
+ Parameters
60
+ ----------
61
+ indegree : dict[str, int]
62
+ Mapping of node name to indegree.
63
+
64
+ Returns
65
+ -------
66
+ list[str]
67
+ Sorted list of node names ready to process.
68
+ """
69
+ return sorted(name for name, deg in indegree.items() if deg == 0)
70
+
71
+
50
72
  # SECTION: FUNCTIONS ======================================================== #
51
73
 
52
74
 
@@ -88,7 +110,7 @@ def topological_sort_jobs(
88
110
  edges[dep].add(job.name)
89
111
  indegree[job.name] += 1
90
112
 
91
- queue = deque(sorted(name for name, deg in indegree.items() if deg == 0))
113
+ queue = deque(_ready(indegree))
92
114
  ordered: list[str] = []
93
115
 
94
116
  while queue:
etlplus/workflow/jobs.py CHANGED
@@ -6,14 +6,13 @@ transform, load).
6
6
 
7
7
  Notes
8
8
  -----
9
- - Lightweight references used inside :class:`PipelineConfig` to avoid storing
10
- large nested structures.
11
9
  - All attributes are simple and optional where appropriate, keeping parsing
12
10
  tolerant.
13
11
  """
14
12
 
15
13
  from __future__ import annotations
16
14
 
15
+ from collections.abc import Sequence
17
16
  from dataclasses import dataclass
18
17
  from dataclasses import field
19
18
  from typing import Any
@@ -76,13 +75,15 @@ def _parse_depends_on(
76
75
  """
77
76
  if isinstance(value, str):
78
77
  return [value]
79
- if isinstance(value, list):
78
+ if isinstance(value, Sequence) and not isinstance(
79
+ value,
80
+ (str, bytes, bytearray),
81
+ ):
80
82
  return [entry for entry in value if isinstance(entry, str)]
81
83
  return []
82
84
 
83
85
 
84
86
  def _require_str(
85
- # data: dict[str, Any],
86
87
  data: StrAnyMap,
87
88
  key: str,
88
89
  ) -> str | None:
@@ -149,13 +150,9 @@ class ExtractRef:
149
150
  data = maybe_mapping(obj)
150
151
  if not data:
151
152
  return None
152
- source = _require_str(data, 'source')
153
- if source is None:
153
+ if (source := _require_str(data, 'source')) is None:
154
154
  return None
155
- return cls(
156
- source=source,
157
- options=coerce_dict(data.get('options')),
158
- )
155
+ return cls(source=source, options=coerce_dict(data.get('options')))
159
156
 
160
157
 
161
158
  @dataclass(kw_only=True, slots=True)
@@ -214,18 +211,13 @@ class JobConfig:
214
211
  data = maybe_mapping(obj)
215
212
  if not data:
216
213
  return None
217
- name = _require_str(data, 'name')
218
- if name is None:
214
+ if (name := _require_str(data, 'name')) is None:
219
215
  return None
220
216
 
221
- description = _coerce_optional_str(data.get('description'))
222
-
223
- depends_on = _parse_depends_on(data.get('depends_on'))
224
-
225
217
  return cls(
226
218
  name=name,
227
- description=description,
228
- depends_on=depends_on,
219
+ description=_coerce_optional_str(data.get('description')),
220
+ depends_on=_parse_depends_on(data.get('depends_on')),
229
221
  extract=ExtractRef.from_obj(data.get('extract')),
230
222
  validate=ValidationRef.from_obj(data.get('validate')),
231
223
  transform=TransformRef.from_obj(data.get('transform')),
@@ -274,8 +266,7 @@ class LoadRef:
274
266
  data = maybe_mapping(obj)
275
267
  if not data:
276
268
  return None
277
- target = _require_str(data, 'target')
278
- if target is None:
269
+ if (target := _require_str(data, 'target')) is None:
279
270
  return None
280
271
  return cls(
281
272
  target=target,
@@ -321,8 +312,7 @@ class TransformRef:
321
312
  data = maybe_mapping(obj)
322
313
  if not data:
323
314
  return None
324
- pipeline = _require_str(data, 'pipeline')
325
- if pipeline is None:
315
+ if (pipeline := _require_str(data, 'pipeline')) is None:
326
316
  return None
327
317
  return cls(pipeline=pipeline)
328
318
 
@@ -372,13 +362,10 @@ class ValidationRef:
372
362
  data = maybe_mapping(obj)
373
363
  if not data:
374
364
  return None
375
- ruleset = _require_str(data, 'ruleset')
376
- if ruleset is None:
365
+ if (ruleset := _require_str(data, 'ruleset')) is None:
377
366
  return None
378
- severity = _coerce_optional_str(data.get('severity'))
379
- phase = _coerce_optional_str(data.get('phase'))
380
367
  return cls(
381
368
  ruleset=ruleset,
382
- severity=severity,
383
- phase=phase,
369
+ severity=_coerce_optional_str(data.get('severity')),
370
+ phase=_coerce_optional_str(data.get('phase')),
384
371
  )