etlplus 0.16.0__py3-none-any.whl → 0.16.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
etlplus/ops/run.py CHANGED
@@ -6,31 +6,23 @@ A module for running ETL jobs defined in YAML configurations.
6
6
 
7
7
  from __future__ import annotations
8
8
 
9
- from collections.abc import Mapping
10
9
  from typing import Any
11
10
  from typing import Final
12
11
  from typing import cast
13
- from urllib.parse import urlsplit
14
- from urllib.parse import urlunsplit
15
12
 
16
- from ..api import EndpointClient # noqa: F401 (re-exported for tests)
17
13
  from ..api import HttpMethod
18
- from ..api import PaginationConfigMap
19
- from ..api import RequestOptions
20
- from ..api import compose_api_request_env
21
- from ..api import compose_api_target_env
22
- from ..api import paginate_with_client
23
14
  from ..connector import DataConnectorType
24
15
  from ..file import FileFormat
16
+ from ..ops.types import PipelineConfig
25
17
  from ..types import JSONData
26
18
  from ..types import JSONDict
27
- from ..types import PipelineConfig
28
19
  from ..types import StrPath
29
- from ..types import Timeout
30
20
  from ..utils import print_json
31
21
  from ..workflow import load_pipeline_config
32
22
  from .extract import extract
23
+ from .extract import extract_from_api_source
33
24
  from .load import load
25
+ from .load import load_to_api_target
34
26
  from .transform import transform
35
27
  from .utils import maybe_validate
36
28
  from .validate import validate
@@ -54,6 +46,75 @@ DEFAULT_CONFIG_PATH: Final[str] = 'in/pipeline.yml'
54
46
  # SECTION: INTERNAL FUNCTIONS =============================================== #
55
47
 
56
48
 
49
+ def _index_connectors(
50
+ connectors: list[Any],
51
+ *,
52
+ label: str,
53
+ ) -> dict[str, Any]:
54
+ """
55
+ Index connectors by name with a helpful error on duplicates.
56
+
57
+ Parameters
58
+ ----------
59
+ connectors : list[Any]
60
+ Connector objects to index.
61
+ label : str
62
+ Label used in error messages (e.g., ``"source"``).
63
+
64
+ Returns
65
+ -------
66
+ dict[str, Any]
67
+ Mapping of connector names to connector objects.
68
+
69
+ Raises
70
+ ------
71
+ ValueError
72
+ If duplicate connector names are found.
73
+ """
74
+ indexed: dict[str, Any] = {}
75
+ for connector in connectors:
76
+ name = getattr(connector, 'name', None)
77
+ if not isinstance(name, str) or not name:
78
+ continue
79
+ if name in indexed:
80
+ raise ValueError(f'Duplicate {label} connector name: {name}')
81
+ indexed[name] = connector
82
+ return indexed
83
+
84
+
85
+ def _require_named_connector(
86
+ connectors: dict[str, Any],
87
+ name: str,
88
+ *,
89
+ label: str,
90
+ ) -> Any:
91
+ """
92
+ Return a connector by name or raise a helpful error.
93
+
94
+ Parameters
95
+ ----------
96
+ connectors : dict[str, Any]
97
+ Mapping of connector names to connector objects.
98
+ name : str
99
+ Connector name to retrieve.
100
+ label : str
101
+ Label used in error messages (e.g., ``"source"``).
102
+
103
+ Returns
104
+ -------
105
+ Any
106
+ Connector object.
107
+
108
+ Raises
109
+ ------
110
+ ValueError
111
+ If the connector name is not found.
112
+ """
113
+ if name not in connectors:
114
+ raise ValueError(f'Unknown {label}: {name}')
115
+ return connectors[name]
116
+
117
+
57
118
  def _resolve_validation_config(
58
119
  job_obj: Any,
59
120
  cfg: Any,
@@ -122,16 +183,18 @@ def run(
122
183
  raise ValueError(f'Job not found: {job}')
123
184
 
124
185
  # Index sources/targets by name
125
- sources_by_name = {getattr(s, 'name', None): s for s in cfg.sources}
126
- targets_by_name = {getattr(t, 'name', None): t for t in cfg.targets}
186
+ sources_by_name = _index_connectors(cfg.sources, label='source')
187
+ targets_by_name = _index_connectors(cfg.targets, label='target')
127
188
 
128
189
  # Extract.
129
190
  if not job_obj.extract:
130
191
  raise ValueError('Job missing "extract" section')
131
192
  source_name = job_obj.extract.source
132
- if source_name not in sources_by_name:
133
- raise ValueError(f'Unknown source: {source_name}')
134
- source_obj = sources_by_name[source_name]
193
+ source_obj = _require_named_connector(
194
+ sources_by_name,
195
+ source_name,
196
+ label='source',
197
+ )
135
198
  ex_opts: dict[str, Any] = job_obj.extract.options or {}
136
199
 
137
200
  data: Any
@@ -151,68 +214,7 @@ def run(
151
214
  conn = getattr(source_obj, 'connection_string', '')
152
215
  data = extract('database', conn)
153
216
  case DataConnectorType.API:
154
- env = compose_api_request_env(cfg, source_obj, ex_opts)
155
- if (
156
- env.get('use_endpoints')
157
- and env.get('base_url')
158
- and env.get('endpoints_map')
159
- and env.get('endpoint_key')
160
- ):
161
- # Construct client using module-level EndpointClient so tests
162
- # can monkeypatch this class on etlplus.ops.run.
163
- ClientClass = EndpointClient # noqa: N806
164
- client = ClientClass(
165
- base_url=cast(str, env.get('base_url')),
166
- base_path=cast(str | None, env.get('base_path')),
167
- endpoints=cast(
168
- dict[str, str],
169
- env.get('endpoints_map', {}),
170
- ),
171
- retry=env.get('retry'),
172
- retry_network_errors=bool(
173
- env.get('retry_network_errors', False),
174
- ),
175
- session=env.get('session'),
176
- )
177
- data = paginate_with_client(
178
- client,
179
- cast(str, env.get('endpoint_key')),
180
- env.get('params'),
181
- env.get('headers'),
182
- env.get('timeout'),
183
- env.get('pagination'),
184
- cast(float | None, env.get('sleep_seconds')),
185
- )
186
- else:
187
- url = env.get('url')
188
- if not url:
189
- raise ValueError('API source missing URL')
190
- parts = urlsplit(cast(str, url))
191
- base = urlunsplit((parts.scheme, parts.netloc, '', '', ''))
192
- ClientClass = EndpointClient # noqa: N806
193
- client = ClientClass(
194
- base_url=base,
195
- base_path=None,
196
- endpoints={},
197
- retry=env.get('retry'),
198
- retry_network_errors=bool(
199
- env.get('retry_network_errors', False),
200
- ),
201
- session=env.get('session'),
202
- )
203
-
204
- request_options = RequestOptions(
205
- params=cast(Mapping[str, Any] | None, env.get('params')),
206
- headers=cast(Mapping[str, str] | None, env.get('headers')),
207
- timeout=cast(Timeout | None, env.get('timeout')),
208
- )
209
-
210
- data = client.paginate_url(
211
- cast(str, url),
212
- cast(PaginationConfigMap | None, env.get('pagination')),
213
- request=request_options,
214
- sleep_seconds=cast(float, env.get('sleep_seconds', 0.0)),
215
- )
217
+ data = extract_from_api_source(cfg, source_obj, ex_opts)
216
218
  case _:
217
219
  # :meth:`coerce` already raises for invalid connector types, but
218
220
  # keep explicit guard for defensive programming.
@@ -256,9 +258,11 @@ def run(
256
258
  if not job_obj.load:
257
259
  raise ValueError('Job missing "load" section')
258
260
  target_name = job_obj.load.target
259
- if target_name not in targets_by_name:
260
- raise ValueError(f'Unknown target: {target_name}')
261
- target_obj = targets_by_name[target_name]
261
+ target_obj = _require_named_connector(
262
+ targets_by_name,
263
+ target_name,
264
+ label='target',
265
+ )
262
266
  overrides = job_obj.load.overrides or {}
263
267
 
264
268
  ttype_raw = getattr(target_obj, 'type', None)
@@ -274,26 +278,7 @@ def run(
274
278
  raise ValueError('File target missing "path"')
275
279
  result = load(data, 'file', path, file_format=fmt)
276
280
  case DataConnectorType.API:
277
- env_t = compose_api_target_env(cfg, target_obj, overrides)
278
- url_t = env_t.get('url')
279
- if not url_t:
280
- raise ValueError('API target missing "url"')
281
- kwargs_t: dict[str, Any] = {}
282
- headers = env_t.get('headers')
283
- if headers:
284
- kwargs_t['headers'] = cast(dict[str, str], headers)
285
- if env_t.get('timeout') is not None:
286
- kwargs_t['timeout'] = env_t.get('timeout')
287
- session = env_t.get('session')
288
- if session is not None:
289
- kwargs_t['session'] = session
290
- result = load(
291
- data,
292
- 'api',
293
- cast(str, url_t),
294
- method=cast(str | Any, env_t.get('method') or 'post'),
295
- **kwargs_t,
296
- )
281
+ result = load_to_api_target(cfg, target_obj, overrides, data)
297
282
  case DataConnectorType.DATABASE:
298
283
  conn = overrides.get('connection_string') or getattr(
299
284
  target_obj,
etlplus/ops/transform.py CHANGED
@@ -44,28 +44,28 @@ from collections.abc import Sequence
44
44
  from typing import Any
45
45
  from typing import cast
46
46
 
47
- from ..enums import AggregateName
48
- from ..enums import OperatorName
49
- from ..enums import PipelineStep
50
- from ..types import AggregateFunc
51
- from ..types import AggregateSpec
52
- from ..types import FieldName
53
- from ..types import Fields
54
- from ..types import FilterSpec
47
+ from ..ops.types import PipelineConfig
55
48
  from ..types import JSONData
56
49
  from ..types import JSONDict
57
50
  from ..types import JSONList
58
- from ..types import MapSpec
59
- from ..types import OperatorFunc
60
- from ..types import PipelineConfig
61
- from ..types import PipelineStepName
62
- from ..types import SortKey
63
- from ..types import StepApplier
64
- from ..types import StepOrSteps
65
- from ..types import StepSpec
66
51
  from ..types import StrPath
67
52
  from ..utils import to_number
53
+ from .enums import AggregateName
54
+ from .enums import OperatorName
55
+ from .enums import PipelineStep
68
56
  from .load import load_data
57
+ from .types import AggregateFunc
58
+ from .types import AggregateSpec
59
+ from .types import FieldName
60
+ from .types import Fields
61
+ from .types import FilterSpec
62
+ from .types import MapSpec
63
+ from .types import OperatorFunc
64
+ from .types import PipelineStepName
65
+ from .types import SortKey
66
+ from .types import StepApplier
67
+ from .types import StepOrSteps
68
+ from .types import StepSpec
69
69
 
70
70
  # SECTION: EXPORTS ========================================================== #
71
71
 
@@ -206,15 +206,12 @@ def _normalize_specs(
206
206
  """
207
207
  if config is None:
208
208
  return []
209
- if isinstance(config, Sequence) and not isinstance(
210
- config,
211
- (str, bytes, bytearray),
212
- ):
209
+ if _is_sequence_not_text(config):
213
210
  # Already a sequence of step specs; normalize to a list.
214
- return list(config) # type: ignore[list-item]
211
+ return list(cast(Sequence[StepSpec], config))
215
212
 
216
213
  # Single spec
217
- return [config]
214
+ return [cast(StepSpec, config)]
218
215
 
219
216
 
220
217
  def _normalize_operation_keys(ops: Mapping[Any, Any]) -> dict[str, Any]:
@@ -702,7 +699,31 @@ def _apply_sort_step(
702
699
  # -- Helpers -- #
703
700
 
704
701
 
705
- def _is_plain_fields_list(obj: Any) -> bool:
702
+ def _is_sequence_not_text(
703
+ obj: Any,
704
+ ) -> bool:
705
+ """
706
+ Return ``True`` for non-text sequences.
707
+
708
+ Parameters
709
+ ----------
710
+ obj : Any
711
+ The object to check.
712
+
713
+ Returns
714
+ -------
715
+ bool
716
+ ``True`` when *obj* is a non-text sequence.
717
+ """
718
+ return isinstance(obj, Sequence) and not isinstance(
719
+ obj,
720
+ (str, bytes, bytearray),
721
+ )
722
+
723
+
724
+ def _is_plain_fields_list(
725
+ obj: Any,
726
+ ) -> bool:
706
727
  """
707
728
  Return True if obj is a non-text sequence of non-mapping items.
708
729
 
@@ -719,10 +740,8 @@ def _is_plain_fields_list(obj: Any) -> bool:
719
740
  True if obj is a non-text sequence of non-mapping items, False
720
741
  otherwise.
721
742
  """
722
- return (
723
- isinstance(obj, Sequence)
724
- and not isinstance(obj, (str, bytes, bytearray))
725
- and not any(isinstance(x, Mapping) for x in obj)
743
+ return _is_sequence_not_text(obj) and not any(
744
+ isinstance(x, Mapping) for x in obj
726
745
  )
727
746
 
728
747
 
etlplus/ops/types.py ADDED
@@ -0,0 +1,147 @@
1
+ """
2
+ :mod:`etlplus.ops.types` module.
3
+
4
+ Shared type aliases leveraged across :mod:`etlplus.ops` modules.
5
+
6
+ Notes
7
+ -----
8
+ - Centralizes ops-focused aliases (functions, specs, and pipeline helpers).
9
+ - Relies on Python 3.13 ``type`` statements for readability and IDE support.
10
+
11
+ Examples
12
+ --------
13
+ >>> from etlplus.ops.types import AggregateFunc, OperatorFunc
14
+ >>> def total(xs: list[float], _: int) -> float:
15
+ ... return sum(xs)
16
+ >>> agg: AggregateFunc = total
17
+ >>> op: OperatorFunc = lambda a, b: a == b
18
+ """
19
+
20
+ from __future__ import annotations
21
+
22
+ from collections.abc import Callable
23
+ from collections.abc import Mapping
24
+ from collections.abc import Sequence
25
+ from typing import Any
26
+ from typing import Literal
27
+
28
+ from ..types import JSONList
29
+ from ..types import StrAnyMap
30
+ from ..types import StrSeqMap
31
+ from ..types import StrStrMap
32
+
33
+ # SECTION: EXPORTS ========================================================== #
34
+
35
+
36
+ __all__ = [
37
+ # Type Aliases (Functions)
38
+ 'AggregateFunc',
39
+ 'OperatorFunc',
40
+ # Type Aliases (Records & Fields)
41
+ 'FieldName',
42
+ 'Fields',
43
+ # Type Aliases (Transform Specs)
44
+ 'AggregateSpec',
45
+ 'FilterSpec',
46
+ 'MapSpec',
47
+ 'SelectSpec',
48
+ 'SortSpec',
49
+ # Type Aliases (Pipelines)
50
+ 'StepOrSteps',
51
+ 'StepSeq',
52
+ 'StepSpec',
53
+ 'PipelineConfig',
54
+ 'PipelineStepName',
55
+ # Type Aliases (Helpers)
56
+ 'StepApplier',
57
+ 'SortKey',
58
+ ]
59
+
60
+
61
+ # SECTION: TYPE ALIASES ===================================================== #
62
+
63
+
64
+ # -- Functions -- #
65
+
66
+
67
+ # TODO: Consider redefining to use `functools.reduce` signature.
68
+ # TODO: Consider adding `**kwargs` to support richer aggregation functions.
69
+ # TODO: Consider constraining first argument to `Sequence[float]`.
70
+ # TODO: Consider constraining return type to `float | int | None`.
71
+ # Callable reducing numeric collections into a summary value.
72
+ type AggregateFunc = Callable[[list[float], int], Any]
73
+
74
+ # Binary predicate consumed by filter operations.
75
+ type OperatorFunc = Callable[[Any, Any], bool]
76
+
77
+ # -- Records & Fields -- #
78
+
79
+ # Individual field identifier referenced inside specs.
80
+ type FieldName = str
81
+
82
+ # Ordered list of :data:`FieldName` entries preserving projection order.
83
+ type Fields = list[FieldName]
84
+
85
+ # -- Transform Specs -- #
86
+
87
+ # Filtering spec expecting ``field``, ``op``, and ``value`` keys.
88
+ type FilterSpec = StrAnyMap
89
+
90
+ # Field renaming instructions mapping old keys to new ones.
91
+ type MapSpec = StrStrMap
92
+
93
+ # Projection spec as a field list or mapping with metadata.
94
+ #
95
+ # Examples
96
+ # --------
97
+ # >>> from etlplus.ops.types import SelectSpec
98
+ # >>> spec1: SelectSpec = ['a','b']
99
+ # >>> spec2: SelectSpec = {'fields': [...]}
100
+ type SelectSpec = Fields | StrSeqMap
101
+
102
+ # Sort directive expressed as a field string or mapping with flags.
103
+ #
104
+ # Examples
105
+ # --------
106
+ # >>> from etlplus.ops.types import SortSpec
107
+ # >>> spec1: SortSpec = 'field'
108
+ # >>> spec2: SortSpec = {'field': 'x', 'reverse': True}
109
+ type SortSpec = str | StrAnyMap
110
+
111
+ # Aggregate instruction covering ``field``, ``func``, and optional alias.
112
+ #
113
+ # Supported functions: ``avg``, ``count``, ``max``, ``min``, and ``sum``.
114
+ # Examples
115
+ # --------
116
+ # >>> from etlplus.ops.types import AggregateSpec
117
+ # >>> spec: AggregateSpec = \
118
+ # ... {'field': 'x', 'func': 'sum' | 'avg' | ..., 'alias'?: '...'}
119
+ type AggregateSpec = StrAnyMap
120
+
121
+ # -- Pipelines-- #
122
+
123
+ # Unified pipeline step spec consumed by :mod:`etlplus.ops.transform`.
124
+ type StepSpec = AggregateSpec | FilterSpec | MapSpec | SelectSpec | SortSpec
125
+
126
+ # Collections of steps
127
+
128
+ # Ordered collection of :data:`StepSpec` entries.
129
+ type StepSeq = Sequence[StepSpec]
130
+
131
+ # Accepts either a single :data:`StepSpec` or a sequence of them.
132
+ type StepOrSteps = StepSpec | StepSeq
133
+
134
+ # Canonical literal names for supported transform stages.
135
+ type PipelineStepName = Literal['aggregate', 'filter', 'map', 'select', 'sort']
136
+
137
+ # Mapping from step name to its associated specification payload.
138
+ # TODO: Consider replacing with etlplus.workflow.types.PipelineConfig.
139
+ type PipelineConfig = Mapping[PipelineStepName, StepOrSteps]
140
+
141
+ # -- Helpers -- #
142
+
143
+ # Callable that applies step configuration to a batch of records.
144
+ type StepApplier = Callable[[JSONList, Any], JSONList]
145
+
146
+ # Tuple combining stable sort index and computed sort value.
147
+ type SortKey = tuple[int, Any]
etlplus/types.py CHANGED
@@ -12,8 +12,7 @@ Notes
12
12
  See Also
13
13
  --------
14
14
  - :mod:`etlplus.api.types` for HTTP-specific aliases and data classes
15
- - :mod:`etlplus.connector.types` for connector-specific aliases and TypedDict
16
- surfaces
15
+ - :mod:`etlplus.connector.types` for connector-specific aliases
17
16
 
18
17
  Examples
19
18
  --------
@@ -54,33 +53,15 @@ __all__ = [
54
53
  'JSONRecords',
55
54
  # Type Aliases (File System)
56
55
  'StrPath',
57
- # Type Aliases (Functions)
58
- 'AggregateFunc',
59
- 'OperatorFunc',
60
- # Type Aliases (Records & Fields)
61
- 'FieldName',
62
- 'Fields',
63
56
  # Type Aliases (Transform Specs)
64
57
  'StrAnyMap',
65
58
  'StrSeqMap',
66
59
  'StrStrMap',
67
- 'AggregateSpec',
68
- 'FilterSpec',
69
- 'MapSpec',
70
- 'SelectSpec',
71
- 'SortSpec',
72
- # Type Aliases (Pipelines)
73
- 'StepOrSteps',
74
- 'StepSeq',
75
- 'StepSpec',
76
- 'PipelineStepName',
77
- 'PipelineConfig',
78
- # Type Aliases (Helpers)
79
- 'StepApplier',
80
- 'SortKey',
81
60
  # Type Aliases (Networking / Runtime)
82
61
  'Sleeper',
83
62
  'Timeout',
63
+ # Type Aliases (Templates)
64
+ 'TemplateKey',
84
65
  ]
85
66
 
86
67
 
@@ -125,22 +106,6 @@ type JSONRecords = list[JSONRecord]
125
106
  # Path-like inputs accepted by file helpers.
126
107
  type StrPath = str | Path | PathLike[str]
127
108
 
128
- # -- Functions -- #
129
-
130
- # Callable reducing numeric collections into a summary value.
131
- type AggregateFunc = Callable[[list[float], int], Any]
132
-
133
- # Binary predicate consumed by filter operations.
134
- type OperatorFunc = Callable[[Any, Any], bool]
135
-
136
- # -- Records & Fields -- #
137
-
138
- # Individual field identifier referenced inside specs.
139
- type FieldName = str
140
-
141
- # Ordered list of :data:`FieldName` entries preserving projection order.
142
- type Fields = list[FieldName]
143
-
144
109
  # -- Transform Specs -- #
145
110
 
146
111
  # Kept intentionally broad for runtime-friendly validation in transform.py.
@@ -156,69 +121,6 @@ type StrStrMap = Mapping[str, str]
156
121
  # Mapping whose values are homogeneous sequences.
157
122
  type StrSeqMap = Mapping[str, Sequence[Any]]
158
123
 
159
- # Transform step specifications
160
-
161
- # Filtering spec expecting ``field``, ``op``, and ``value`` keys.
162
- type FilterSpec = StrAnyMap
163
-
164
- # Field renaming instructions mapping old keys to new ones.
165
- type MapSpec = StrStrMap
166
-
167
- # Projection spec as a field list or mapping with metadata.
168
- #
169
- # Examples
170
- # --------
171
- # >>> from etlplus.types import SelectSpec
172
- # >>> spec1: SelectSpec = ['a','b']
173
- # >>> spec2: SelectSpec = {'fields': [...]}
174
- type SelectSpec = Fields | StrSeqMap
175
-
176
- # Sort directive expressed as a field string or mapping with flags.
177
- #
178
- # Examples
179
- # --------
180
- # >>> from etlplus.types import SortSpec
181
- # >>> spec1: SortSpec = 'field'
182
- # >>> spec2: SortSpec = {'field': 'x', 'reverse': True}
183
- type SortSpec = str | StrAnyMap
184
-
185
- # Aggregate instruction covering ``field``, ``func``, and optional alias.
186
- #
187
- # Supported functions: ``avg``, ``count``, ``max``, ``min``, and ``sum``.
188
- # Examples
189
- # --------
190
- # >>> from etlplus.types import AggregateSpec
191
- # >>> spec: AggregateSpec = \
192
- # ... {'field': 'x', 'func': 'sum' | 'avg' | ..., 'alias'?: '...'}
193
- type AggregateSpec = StrAnyMap
194
-
195
- # -- Pipelines-- #
196
-
197
- # Unified pipeline step spec consumed by :mod:`etlplus.ops.transform`.
198
- type StepSpec = AggregateSpec | FilterSpec | MapSpec | SelectSpec | SortSpec
199
-
200
- # Collections of steps
201
-
202
- # Ordered collection of :data:`StepSpec` entries.
203
- type StepSeq = Sequence[StepSpec]
204
-
205
- # Accepts either a single :data:`StepSpec` or a sequence of them.
206
- type StepOrSteps = StepSpec | StepSeq
207
-
208
- # Canonical literal names for supported transform stages.
209
- type PipelineStepName = Literal['filter', 'map', 'select', 'sort', 'aggregate']
210
-
211
- # Mapping from step name to its associated specification payload.
212
- type PipelineConfig = Mapping[PipelineStepName, StepOrSteps]
213
-
214
- # -- Helpers -- #
215
-
216
- # Callable that applies step configuration to a batch of records.
217
- type StepApplier = Callable[[JSONList, Any], JSONList]
218
-
219
- # Tuple combining stable sort index and computed sort value.
220
- type SortKey = tuple[int, Any]
221
-
222
124
  # -- Networking / Runtime -- #
223
125
 
224
126
  # Sleep function used by retry helpers.
@@ -14,6 +14,7 @@ from .jobs import TransformRef
14
14
  from .jobs import ValidationRef
15
15
  from .pipeline import PipelineConfig
16
16
  from .pipeline import load_pipeline_config
17
+ from .profile import ProfileConfig
17
18
 
18
19
  # SECTION: EXPORTS ========================================================== #
19
20
 
@@ -24,6 +25,7 @@ __all__ = [
24
25
  'JobConfig',
25
26
  'LoadRef',
26
27
  'PipelineConfig',
28
+ 'ProfileConfig',
27
29
  'TransformRef',
28
30
  'ValidationRef',
29
31
  # Functions
etlplus/workflow/dag.py CHANGED
@@ -47,6 +47,28 @@ class DagError(ValueError):
47
47
  return self.message
48
48
 
49
49
 
50
+ # SECTION: INTERNAL FUNCTIONS =============================================== #
51
+
52
+
53
+ def _ready(
54
+ indegree: dict[str, int],
55
+ ) -> list[str]:
56
+ """
57
+ Return a sorted list of nodes with zero indegree.
58
+
59
+ Parameters
60
+ ----------
61
+ indegree : dict[str, int]
62
+ Mapping of node name to indegree.
63
+
64
+ Returns
65
+ -------
66
+ list[str]
67
+ Sorted list of node names ready to process.
68
+ """
69
+ return sorted(name for name, deg in indegree.items() if deg == 0)
70
+
71
+
50
72
  # SECTION: FUNCTIONS ======================================================== #
51
73
 
52
74
 
@@ -88,7 +110,7 @@ def topological_sort_jobs(
88
110
  edges[dep].add(job.name)
89
111
  indegree[job.name] += 1
90
112
 
91
- queue = deque(sorted(name for name, deg in indegree.items() if deg == 0))
113
+ queue = deque(_ready(indegree))
92
114
  ordered: list[str] = []
93
115
 
94
116
  while queue: