etlplus 0.12.13__py3-none-any.whl → 0.13.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- etlplus/config/jobs.py +14 -4
- etlplus/dag.py +103 -0
- {etlplus-0.12.13.dist-info → etlplus-0.13.0.dist-info}/METADATA +1 -1
- {etlplus-0.12.13.dist-info → etlplus-0.13.0.dist-info}/RECORD +8 -7
- {etlplus-0.12.13.dist-info → etlplus-0.13.0.dist-info}/WHEEL +0 -0
- {etlplus-0.12.13.dist-info → etlplus-0.13.0.dist-info}/entry_points.txt +0 -0
- {etlplus-0.12.13.dist-info → etlplus-0.13.0.dist-info}/licenses/LICENSE +0 -0
- {etlplus-0.12.13.dist-info → etlplus-0.13.0.dist-info}/top_level.txt +0 -0
etlplus/config/jobs.py
CHANGED
|
@@ -34,10 +34,7 @@ __all__ = [
|
|
|
34
34
|
]
|
|
35
35
|
|
|
36
36
|
|
|
37
|
-
# SECTION:
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
# SECTION: CLASSES ========================================================== #
|
|
37
|
+
# SECTION: DATA CLASSES ===================================================== #
|
|
41
38
|
|
|
42
39
|
|
|
43
40
|
@dataclass(kw_only=True, slots=True)
|
|
@@ -100,6 +97,8 @@ class JobConfig:
|
|
|
100
97
|
Unique job name.
|
|
101
98
|
description : str | None
|
|
102
99
|
Optional human-friendly description.
|
|
100
|
+
depends_on : list[str]
|
|
101
|
+
Optional job dependency list. Dependencies must refer to other jobs.
|
|
103
102
|
extract : ExtractRef | None
|
|
104
103
|
Extraction reference.
|
|
105
104
|
validate : ValidationRef | None
|
|
@@ -114,6 +113,7 @@ class JobConfig:
|
|
|
114
113
|
|
|
115
114
|
name: str
|
|
116
115
|
description: str | None = None
|
|
116
|
+
depends_on: list[str] = field(default_factory=list)
|
|
117
117
|
extract: ExtractRef | None = None
|
|
118
118
|
validate: ValidationRef | None = None
|
|
119
119
|
transform: TransformRef | None = None
|
|
@@ -149,9 +149,19 @@ class JobConfig:
|
|
|
149
149
|
if description is not None and not isinstance(description, str):
|
|
150
150
|
description = str(description)
|
|
151
151
|
|
|
152
|
+
depends_raw = data.get('depends_on')
|
|
153
|
+
depends_on: list[str] = []
|
|
154
|
+
if isinstance(depends_raw, str):
|
|
155
|
+
depends_on = [depends_raw]
|
|
156
|
+
elif isinstance(depends_raw, list):
|
|
157
|
+
for entry in depends_raw:
|
|
158
|
+
if isinstance(entry, str):
|
|
159
|
+
depends_on.append(entry)
|
|
160
|
+
|
|
152
161
|
return cls(
|
|
153
162
|
name=name,
|
|
154
163
|
description=description,
|
|
164
|
+
depends_on=depends_on,
|
|
155
165
|
extract=ExtractRef.from_obj(data.get('extract')),
|
|
156
166
|
validate=ValidationRef.from_obj(data.get('validate')),
|
|
157
167
|
transform=TransformRef.from_obj(data.get('transform')),
|
etlplus/dag.py
ADDED
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
"""
|
|
2
|
+
:mod:`etlplus.dag` module.
|
|
3
|
+
|
|
4
|
+
Lightweight directed acyclic graph (DAG) helpers for ordering jobs based on
|
|
5
|
+
``depends_on``.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
from collections import deque
|
|
11
|
+
from dataclasses import dataclass
|
|
12
|
+
|
|
13
|
+
from .config.jobs import JobConfig
|
|
14
|
+
|
|
15
|
+
# SECTION: EXPORTS ========================================================== #
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
__all__ = [
|
|
19
|
+
'DagError',
|
|
20
|
+
'topological_sort_jobs',
|
|
21
|
+
]
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
# SECTION: ERRORS =========================================================== #
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
@dataclass(slots=True)
|
|
28
|
+
class DagError(ValueError):
|
|
29
|
+
"""
|
|
30
|
+
Raised when the job dependency graph is invalid.
|
|
31
|
+
|
|
32
|
+
Attributes
|
|
33
|
+
----------
|
|
34
|
+
message : str
|
|
35
|
+
Error message.
|
|
36
|
+
"""
|
|
37
|
+
|
|
38
|
+
# -- Attributes -- #
|
|
39
|
+
|
|
40
|
+
message: str
|
|
41
|
+
|
|
42
|
+
# -- Magic Methods (Object Representation) -- #
|
|
43
|
+
|
|
44
|
+
def __str__(self) -> str:
|
|
45
|
+
return self.message
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
# SECTION: FUNCTIONS ======================================================== #
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def topological_sort_jobs(
|
|
52
|
+
jobs: list[JobConfig],
|
|
53
|
+
) -> list[JobConfig]:
|
|
54
|
+
"""
|
|
55
|
+
Return jobs in topological order based on ``depends_on``.
|
|
56
|
+
|
|
57
|
+
Parameters
|
|
58
|
+
----------
|
|
59
|
+
jobs : list[JobConfig]
|
|
60
|
+
List of job configurations to sort.
|
|
61
|
+
|
|
62
|
+
Returns
|
|
63
|
+
-------
|
|
64
|
+
list[JobConfig]
|
|
65
|
+
Jobs sorted in topological order.
|
|
66
|
+
|
|
67
|
+
Raises
|
|
68
|
+
------
|
|
69
|
+
DagError
|
|
70
|
+
If a dependency is missing, self-referential, or when a cycle is
|
|
71
|
+
detected.
|
|
72
|
+
"""
|
|
73
|
+
index = {job.name: job for job in jobs}
|
|
74
|
+
edges: dict[str, set[str]] = {name: set() for name in index}
|
|
75
|
+
indegree: dict[str, int] = {name: 0 for name in index}
|
|
76
|
+
|
|
77
|
+
for job in jobs:
|
|
78
|
+
for dep in job.depends_on:
|
|
79
|
+
if dep not in index:
|
|
80
|
+
raise DagError(
|
|
81
|
+
f'Unknown dependency "{dep}" in job "{job.name}"',
|
|
82
|
+
)
|
|
83
|
+
if dep == job.name:
|
|
84
|
+
raise DagError(f'Job "{job.name}" depends on itself')
|
|
85
|
+
if job.name not in edges[dep]:
|
|
86
|
+
edges[dep].add(job.name)
|
|
87
|
+
indegree[job.name] += 1
|
|
88
|
+
|
|
89
|
+
queue = deque(sorted(name for name, deg in indegree.items() if deg == 0))
|
|
90
|
+
ordered: list[str] = []
|
|
91
|
+
|
|
92
|
+
while queue:
|
|
93
|
+
name = queue.popleft()
|
|
94
|
+
ordered.append(name)
|
|
95
|
+
for child in sorted(edges[name]):
|
|
96
|
+
indegree[child] -= 1
|
|
97
|
+
if indegree[child] == 0:
|
|
98
|
+
queue.append(child)
|
|
99
|
+
|
|
100
|
+
if len(ordered) != len(jobs):
|
|
101
|
+
raise DagError('Dependency cycle detected')
|
|
102
|
+
|
|
103
|
+
return [index[name] for name in ordered]
|
|
@@ -2,6 +2,7 @@ etlplus/README.md,sha256=5jNes37UIy_THNmUr5HSAyS5mTCTa5tqRfEPnvsgV4s,1455
|
|
|
2
2
|
etlplus/__init__.py,sha256=M2gScnyir6WOMAh_EuoQIiAzdcTls0_5hbd_Q6of8I0,1021
|
|
3
3
|
etlplus/__main__.py,sha256=btoROneNiigyfBU7BSzPKZ1R9gzBMpxcpsbPwmuHwTM,479
|
|
4
4
|
etlplus/__version__.py,sha256=1E0GMK_yUWCMQFKxXjTvyMwofi0qT2k4CDNiHWiymWE,327
|
|
5
|
+
etlplus/dag.py,sha256=4EYmBsJax3y4clHv10jjdp3GrBBD_WblvtxUb_JxGCQ,2464
|
|
5
6
|
etlplus/enums.py,sha256=WyxpUEUPdYdXlueKDXGaSEo7o9OqCXyzjDOOPqmW8tw,8326
|
|
6
7
|
etlplus/extract.py,sha256=LOyL8_KCOaIGemTxSnKbN_ttfLWUljqT4OQxANe7G3k,6089
|
|
7
8
|
etlplus/load.py,sha256=aufl-2CpuI_J1hKBY1uFsoVf9Gfl9bKQjs233dYFf00,8631
|
|
@@ -43,7 +44,7 @@ etlplus/cli/types.py,sha256=tclhKVJXDqHzlTQBYKARfqMgDOcuBJ-Zej2pvFy96WM,652
|
|
|
43
44
|
etlplus/config/README.md,sha256=ot6oFZxTz4x83mj1_FrQ13dO0z2QkRFDnkCkx7NPsSs,1636
|
|
44
45
|
etlplus/config/__init__.py,sha256=VZWzOg7d2YR9NT6UwKTv44yf2FRUMjTHynkm1Dl5Qzo,1486
|
|
45
46
|
etlplus/config/connector.py,sha256=0-TIwevHbKRHVmucvyGpPd-3tB1dKHB-dj0yJ6kq5eY,9809
|
|
46
|
-
etlplus/config/jobs.py,sha256=
|
|
47
|
+
etlplus/config/jobs.py,sha256=oa2rNwacy2b_1HP_iFDLarGnn812ZVP2k5cHt4eqBIg,7843
|
|
47
48
|
etlplus/config/pipeline.py,sha256=m4Jh0ctFcKrIx6zR7LEC0sYY5wq0o8NqOruWPlz6qmA,9494
|
|
48
49
|
etlplus/config/profile.py,sha256=Ss2zedQGjkaGSpvBLTD4SZaWViMJ7TJPLB8Q2_BTpPg,1898
|
|
49
50
|
etlplus/config/types.py,sha256=a0epJ3z16HQ5bY3Ctf8s_cQPa3f0HHcwdOcjCP2xoG4,4954
|
|
@@ -125,9 +126,9 @@ etlplus/templates/view.sql.j2,sha256=Iy8DHfhq5yyvrUKDxqp_aHIEXY4Tm6j4wT7YDEFWAhk
|
|
|
125
126
|
etlplus/validation/README.md,sha256=qusyiyJu2DsaK80jlwfXVZ0iDgeuTPOX2EL3a_fcFiw,1401
|
|
126
127
|
etlplus/validation/__init__.py,sha256=Pe5Xg1_EA4uiNZGYu5WTF3j7odjmyxnAJ8rcioaplSQ,1254
|
|
127
128
|
etlplus/validation/utils.py,sha256=Mtqg449VIke0ziy_wd2r6yrwJzQkA1iulZC87FzXMjo,10201
|
|
128
|
-
etlplus-0.
|
|
129
|
-
etlplus-0.
|
|
130
|
-
etlplus-0.
|
|
131
|
-
etlplus-0.
|
|
132
|
-
etlplus-0.
|
|
133
|
-
etlplus-0.
|
|
129
|
+
etlplus-0.13.0.dist-info/licenses/LICENSE,sha256=MuNO63i6kWmgnV2pbP2SLqP54mk1BGmu7CmbtxMmT-U,1069
|
|
130
|
+
etlplus-0.13.0.dist-info/METADATA,sha256=_L0jck50nGtiKn2XwWnpUwHd9ylP3grWBZhATo9ibLM,28104
|
|
131
|
+
etlplus-0.13.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
132
|
+
etlplus-0.13.0.dist-info/entry_points.txt,sha256=6w-2-jzuPa55spzK34h-UKh2JTEShh38adFRONNP9QE,45
|
|
133
|
+
etlplus-0.13.0.dist-info/top_level.txt,sha256=aWWF-udn_sLGuHTM6W6MLh99ArS9ROkUWO8Mi8y1_2U,8
|
|
134
|
+
etlplus-0.13.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|