fabricks 3.0.11__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- fabricks/__init__.py +0 -0
- fabricks/api/__init__.py +11 -0
- fabricks/api/cdc/__init__.py +6 -0
- fabricks/api/cdc/nocdc.py +3 -0
- fabricks/api/cdc/scd1.py +3 -0
- fabricks/api/cdc/scd2.py +3 -0
- fabricks/api/context.py +27 -0
- fabricks/api/core.py +4 -0
- fabricks/api/deploy.py +3 -0
- fabricks/api/exceptions.py +19 -0
- fabricks/api/extenders.py +3 -0
- fabricks/api/job_schema.py +3 -0
- fabricks/api/log.py +3 -0
- fabricks/api/masks.py +3 -0
- fabricks/api/metastore/__init__.py +10 -0
- fabricks/api/metastore/database.py +3 -0
- fabricks/api/metastore/table.py +3 -0
- fabricks/api/metastore/view.py +6 -0
- fabricks/api/notebooks/__init__.py +0 -0
- fabricks/api/notebooks/cluster.py +6 -0
- fabricks/api/notebooks/initialize.py +42 -0
- fabricks/api/notebooks/process.py +54 -0
- fabricks/api/notebooks/run.py +59 -0
- fabricks/api/notebooks/schedule.py +75 -0
- fabricks/api/notebooks/terminate.py +31 -0
- fabricks/api/parsers.py +3 -0
- fabricks/api/schedules.py +3 -0
- fabricks/api/udfs.py +3 -0
- fabricks/api/utils.py +9 -0
- fabricks/api/version.py +3 -0
- fabricks/api/views.py +6 -0
- fabricks/cdc/__init__.py +14 -0
- fabricks/cdc/base/__init__.py +4 -0
- fabricks/cdc/base/_types.py +10 -0
- fabricks/cdc/base/cdc.py +5 -0
- fabricks/cdc/base/configurator.py +223 -0
- fabricks/cdc/base/generator.py +177 -0
- fabricks/cdc/base/merger.py +110 -0
- fabricks/cdc/base/processor.py +471 -0
- fabricks/cdc/cdc.py +5 -0
- fabricks/cdc/nocdc.py +20 -0
- fabricks/cdc/scd.py +22 -0
- fabricks/cdc/scd1.py +15 -0
- fabricks/cdc/scd2.py +15 -0
- fabricks/cdc/templates/__init__.py +0 -0
- fabricks/cdc/templates/ctes/base.sql.jinja +35 -0
- fabricks/cdc/templates/ctes/current.sql.jinja +28 -0
- fabricks/cdc/templates/ctes/deduplicate_hash.sql.jinja +32 -0
- fabricks/cdc/templates/ctes/deduplicate_key.sql.jinja +31 -0
- fabricks/cdc/templates/ctes/rectify.sql.jinja +113 -0
- fabricks/cdc/templates/ctes/slice.sql.jinja +1 -0
- fabricks/cdc/templates/filter.sql.jinja +4 -0
- fabricks/cdc/templates/filters/final.sql.jinja +4 -0
- fabricks/cdc/templates/filters/latest.sql.jinja +17 -0
- fabricks/cdc/templates/filters/update.sql.jinja +30 -0
- fabricks/cdc/templates/macros/bactick.sql.jinja +1 -0
- fabricks/cdc/templates/macros/hash.sql.jinja +18 -0
- fabricks/cdc/templates/merge.sql.jinja +3 -0
- fabricks/cdc/templates/merges/nocdc.sql.jinja +41 -0
- fabricks/cdc/templates/merges/scd1.sql.jinja +73 -0
- fabricks/cdc/templates/merges/scd2.sql.jinja +54 -0
- fabricks/cdc/templates/queries/__init__.py +0 -0
- fabricks/cdc/templates/queries/context.sql.jinja +186 -0
- fabricks/cdc/templates/queries/final.sql.jinja +1 -0
- fabricks/cdc/templates/queries/nocdc/complete.sql.jinja +10 -0
- fabricks/cdc/templates/queries/nocdc/update.sql.jinja +34 -0
- fabricks/cdc/templates/queries/scd1.sql.jinja +85 -0
- fabricks/cdc/templates/queries/scd2.sql.jinja +98 -0
- fabricks/cdc/templates/query.sql.jinja +15 -0
- fabricks/context/__init__.py +72 -0
- fabricks/context/_types.py +133 -0
- fabricks/context/config/__init__.py +92 -0
- fabricks/context/config/utils.py +53 -0
- fabricks/context/log.py +77 -0
- fabricks/context/runtime.py +117 -0
- fabricks/context/secret.py +103 -0
- fabricks/context/spark_session.py +82 -0
- fabricks/context/utils.py +80 -0
- fabricks/core/__init__.py +4 -0
- fabricks/core/dags/__init__.py +9 -0
- fabricks/core/dags/base.py +99 -0
- fabricks/core/dags/generator.py +157 -0
- fabricks/core/dags/log.py +12 -0
- fabricks/core/dags/processor.py +228 -0
- fabricks/core/dags/run.py +39 -0
- fabricks/core/dags/terminator.py +25 -0
- fabricks/core/dags/utils.py +54 -0
- fabricks/core/extenders.py +33 -0
- fabricks/core/job_schema.py +32 -0
- fabricks/core/jobs/__init__.py +21 -0
- fabricks/core/jobs/base/__init__.py +10 -0
- fabricks/core/jobs/base/_types.py +284 -0
- fabricks/core/jobs/base/checker.py +139 -0
- fabricks/core/jobs/base/configurator.py +306 -0
- fabricks/core/jobs/base/exception.py +85 -0
- fabricks/core/jobs/base/generator.py +447 -0
- fabricks/core/jobs/base/invoker.py +206 -0
- fabricks/core/jobs/base/job.py +5 -0
- fabricks/core/jobs/base/processor.py +249 -0
- fabricks/core/jobs/bronze.py +395 -0
- fabricks/core/jobs/get_job.py +127 -0
- fabricks/core/jobs/get_job_conf.py +152 -0
- fabricks/core/jobs/get_job_id.py +31 -0
- fabricks/core/jobs/get_jobs.py +107 -0
- fabricks/core/jobs/get_schedule.py +10 -0
- fabricks/core/jobs/get_schedules.py +32 -0
- fabricks/core/jobs/gold.py +415 -0
- fabricks/core/jobs/silver.py +373 -0
- fabricks/core/masks.py +52 -0
- fabricks/core/parsers/__init__.py +12 -0
- fabricks/core/parsers/_types.py +6 -0
- fabricks/core/parsers/base.py +95 -0
- fabricks/core/parsers/decorator.py +11 -0
- fabricks/core/parsers/get_parser.py +26 -0
- fabricks/core/parsers/utils.py +69 -0
- fabricks/core/schedules/__init__.py +14 -0
- fabricks/core/schedules/diagrams.py +21 -0
- fabricks/core/schedules/generate.py +20 -0
- fabricks/core/schedules/get_schedule.py +5 -0
- fabricks/core/schedules/get_schedules.py +9 -0
- fabricks/core/schedules/process.py +9 -0
- fabricks/core/schedules/run.py +3 -0
- fabricks/core/schedules/terminate.py +6 -0
- fabricks/core/schedules/views.py +61 -0
- fabricks/core/steps/__init__.py +4 -0
- fabricks/core/steps/_types.py +7 -0
- fabricks/core/steps/base.py +423 -0
- fabricks/core/steps/get_step.py +10 -0
- fabricks/core/steps/get_step_conf.py +26 -0
- fabricks/core/udfs.py +106 -0
- fabricks/core/views.py +41 -0
- fabricks/deploy/__init__.py +92 -0
- fabricks/deploy/masks.py +8 -0
- fabricks/deploy/notebooks.py +71 -0
- fabricks/deploy/schedules.py +10 -0
- fabricks/deploy/tables.py +82 -0
- fabricks/deploy/udfs.py +19 -0
- fabricks/deploy/utils.py +36 -0
- fabricks/deploy/views.py +509 -0
- fabricks/metastore/README.md +3 -0
- fabricks/metastore/__init__.py +5 -0
- fabricks/metastore/_types.py +65 -0
- fabricks/metastore/database.py +65 -0
- fabricks/metastore/dbobject.py +66 -0
- fabricks/metastore/pyproject.toml +20 -0
- fabricks/metastore/table.py +768 -0
- fabricks/metastore/utils.py +51 -0
- fabricks/metastore/view.py +53 -0
- fabricks/utils/__init__.py +0 -0
- fabricks/utils/_types.py +6 -0
- fabricks/utils/azure_queue.py +93 -0
- fabricks/utils/azure_table.py +154 -0
- fabricks/utils/console.py +51 -0
- fabricks/utils/fdict.py +240 -0
- fabricks/utils/helpers.py +228 -0
- fabricks/utils/log.py +236 -0
- fabricks/utils/mermaid.py +32 -0
- fabricks/utils/path.py +242 -0
- fabricks/utils/pip.py +61 -0
- fabricks/utils/pydantic.py +94 -0
- fabricks/utils/read/__init__.py +11 -0
- fabricks/utils/read/_types.py +3 -0
- fabricks/utils/read/read.py +305 -0
- fabricks/utils/read/read_excel.py +5 -0
- fabricks/utils/read/read_yaml.py +33 -0
- fabricks/utils/schema/__init__.py +7 -0
- fabricks/utils/schema/get_json_schema_for_type.py +161 -0
- fabricks/utils/schema/get_schema_for_type.py +99 -0
- fabricks/utils/spark.py +76 -0
- fabricks/utils/sqlglot.py +56 -0
- fabricks/utils/write/__init__.py +8 -0
- fabricks/utils/write/delta.py +46 -0
- fabricks/utils/write/stream.py +27 -0
- fabricks-3.0.11.dist-info/METADATA +23 -0
- fabricks-3.0.11.dist-info/RECORD +176 -0
- fabricks-3.0.11.dist-info/WHEEL +4 -0
|
@@ -0,0 +1,228 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import threading
|
|
3
|
+
import time
|
|
4
|
+
from multiprocessing import Process
|
|
5
|
+
from typing import Any, List, Union
|
|
6
|
+
|
|
7
|
+
from azure.core.exceptions import AzureError
|
|
8
|
+
from databricks.sdk.runtime import dbutils, spark
|
|
9
|
+
from tenacity import retry, retry_if_exception_type, stop_after_attempt, wait_exponential
|
|
10
|
+
|
|
11
|
+
from fabricks.context import PATH_NOTEBOOKS
|
|
12
|
+
from fabricks.core.dags.base import BaseDags
|
|
13
|
+
from fabricks.core.dags.log import LOGGER
|
|
14
|
+
from fabricks.core.dags.run import run
|
|
15
|
+
from fabricks.core.jobs.base._types import TStep
|
|
16
|
+
from fabricks.core.steps.get_step import get_step
|
|
17
|
+
from fabricks.utils.azure_queue import AzureQueue
|
|
18
|
+
from fabricks.utils.azure_table import AzureTable
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class DagProcessor(BaseDags):
|
|
22
|
+
def __init__(self, schedule_id: str, schedule: str, step: Union[TStep, str], notebook: bool = True):
|
|
23
|
+
self.step = get_step(step=step)
|
|
24
|
+
self.schedule = schedule
|
|
25
|
+
|
|
26
|
+
self.notebook = notebook
|
|
27
|
+
|
|
28
|
+
self._azure_queue = None
|
|
29
|
+
self._azure_table = None
|
|
30
|
+
|
|
31
|
+
super().__init__(schedule_id=schedule_id)
|
|
32
|
+
|
|
33
|
+
@property
|
|
34
|
+
def queue(self) -> AzureQueue:
|
|
35
|
+
if not self._azure_queue:
|
|
36
|
+
step = self.remove_invalid_characters(str(self.step))
|
|
37
|
+
self._azure_queue = AzureQueue(
|
|
38
|
+
f"q{step}{self.schedule_id}",
|
|
39
|
+
**self.get_connection_info(), # type: ignore
|
|
40
|
+
)
|
|
41
|
+
return self._azure_queue
|
|
42
|
+
|
|
43
|
+
@property
|
|
44
|
+
def table(self) -> AzureTable:
|
|
45
|
+
if not self._azure_table:
|
|
46
|
+
self._azure_table = AzureTable(f"t{self.schedule_id}", **self.get_connection_info()) # type: ignore
|
|
47
|
+
return self._azure_table
|
|
48
|
+
|
|
49
|
+
@retry(
|
|
50
|
+
stop=stop_after_attempt(3),
|
|
51
|
+
wait=wait_exponential(multiplier=1, min=1, max=10),
|
|
52
|
+
retry=retry_if_exception_type((Exception, AzureError)),
|
|
53
|
+
reraise=True,
|
|
54
|
+
)
|
|
55
|
+
def query(self, data: Any) -> List[dict]:
|
|
56
|
+
return self.table.query(data)
|
|
57
|
+
|
|
58
|
+
@retry(
|
|
59
|
+
stop=stop_after_attempt(3),
|
|
60
|
+
wait=wait_exponential(multiplier=1, min=1, max=10),
|
|
61
|
+
retry=retry_if_exception_type((Exception, AzureError)),
|
|
62
|
+
reraise=True,
|
|
63
|
+
)
|
|
64
|
+
def upsert(self, data: Any) -> None:
|
|
65
|
+
self.table.upsert(data)
|
|
66
|
+
|
|
67
|
+
@retry(
|
|
68
|
+
stop=stop_after_attempt(3),
|
|
69
|
+
wait=wait_exponential(multiplier=1, min=1, max=10),
|
|
70
|
+
retry=retry_if_exception_type((Exception, AzureError)),
|
|
71
|
+
reraise=True,
|
|
72
|
+
)
|
|
73
|
+
def delete(self, data: Any) -> None:
|
|
74
|
+
self.table.delete(data)
|
|
75
|
+
|
|
76
|
+
def extra(self, d: dict) -> dict:
|
|
77
|
+
return {
|
|
78
|
+
"partition_key": self.schedule_id,
|
|
79
|
+
"schedule": self.schedule,
|
|
80
|
+
"schedule_id": self.schedule_id,
|
|
81
|
+
"step": str(self.step),
|
|
82
|
+
"job": d.get("Job"),
|
|
83
|
+
"target": "table",
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
def send(self):
|
|
87
|
+
while True:
|
|
88
|
+
scheduled = self.get_scheduled()
|
|
89
|
+
assert isinstance(scheduled, List)
|
|
90
|
+
if len(scheduled) == 0:
|
|
91
|
+
for _ in range(self.step.workers):
|
|
92
|
+
self.queue.send_sentinel()
|
|
93
|
+
LOGGER.info("no more job to schedule", extra={"label": str(self.step)})
|
|
94
|
+
break
|
|
95
|
+
|
|
96
|
+
else:
|
|
97
|
+
sorted_scheduled = sorted(scheduled, key=lambda x: x.get("Rank"))
|
|
98
|
+
for s in sorted_scheduled:
|
|
99
|
+
dependencies = self.table.query(f"PartitionKey eq 'dependencies' and JobId eq '{s.get('JobId')}'")
|
|
100
|
+
|
|
101
|
+
if len(dependencies) == 0:
|
|
102
|
+
s["Status"] = "waiting"
|
|
103
|
+
LOGGER.debug("waiting", extra=self.extra(s))
|
|
104
|
+
self.table.upsert(s)
|
|
105
|
+
self.queue.send(s)
|
|
106
|
+
|
|
107
|
+
time.sleep(5)
|
|
108
|
+
|
|
109
|
+
def receive(self):
|
|
110
|
+
while True:
|
|
111
|
+
response = self.queue.receive()
|
|
112
|
+
if response == self.queue.sentinel:
|
|
113
|
+
LOGGER.info("no more job to process", extra={"label": str(self.step)})
|
|
114
|
+
break
|
|
115
|
+
|
|
116
|
+
elif response:
|
|
117
|
+
j = json.loads(response)
|
|
118
|
+
|
|
119
|
+
j["Status"] = "starting"
|
|
120
|
+
self.table.upsert(j)
|
|
121
|
+
LOGGER.info("start", extra=self.extra(j))
|
|
122
|
+
|
|
123
|
+
try:
|
|
124
|
+
if self.notebook:
|
|
125
|
+
dbutils.notebook.run( # type: ignore
|
|
126
|
+
PATH_NOTEBOOKS.joinpath("run").get_notebook_path(), # type: ignore
|
|
127
|
+
self.step.timeouts.job, # type: ignore
|
|
128
|
+
{
|
|
129
|
+
"schedule_id": self.schedule_id,
|
|
130
|
+
"schedule": self.schedule, # needed to pass schedule variables to the job
|
|
131
|
+
"step": str(self.step),
|
|
132
|
+
"job_id": j.get("JobId"),
|
|
133
|
+
"job": j.get("Job"),
|
|
134
|
+
}, # type: ignore
|
|
135
|
+
)
|
|
136
|
+
|
|
137
|
+
else:
|
|
138
|
+
run(
|
|
139
|
+
step=str(self.step),
|
|
140
|
+
job_id=j.get("JobId"),
|
|
141
|
+
schedule_id=self.schedule_id,
|
|
142
|
+
schedule=self.schedule,
|
|
143
|
+
)
|
|
144
|
+
|
|
145
|
+
except Exception:
|
|
146
|
+
LOGGER.warning("fail", extra={"label": j.get("Job")})
|
|
147
|
+
|
|
148
|
+
finally:
|
|
149
|
+
j["Status"] = "ok"
|
|
150
|
+
self.table.upsert(j)
|
|
151
|
+
LOGGER.info("end", extra=self.extra(j))
|
|
152
|
+
|
|
153
|
+
dependencies = self.table.query(f"PartitionKey eq 'dependencies' and ParentId eq '{j.get('JobId')}'")
|
|
154
|
+
self.table.delete(dependencies)
|
|
155
|
+
|
|
156
|
+
def get_scheduled(self, convert: bool = False):
|
|
157
|
+
scheduled = self.table.query(f"PartitionKey eq 'statuses' and Status eq 'scheduled' and Step eq '{self.step}'")
|
|
158
|
+
if convert:
|
|
159
|
+
return spark.createDataFrame(scheduled)
|
|
160
|
+
else:
|
|
161
|
+
return scheduled
|
|
162
|
+
|
|
163
|
+
def _process(self):
|
|
164
|
+
scheduled = self.get_scheduled()
|
|
165
|
+
assert isinstance(scheduled, List)
|
|
166
|
+
|
|
167
|
+
if len(scheduled) > 0:
|
|
168
|
+
sender = threading.Thread(
|
|
169
|
+
target=self.send,
|
|
170
|
+
name=f"{str(self.step).capitalize()}Sender",
|
|
171
|
+
args=(),
|
|
172
|
+
)
|
|
173
|
+
sender.start()
|
|
174
|
+
|
|
175
|
+
receivers = []
|
|
176
|
+
for i in range(self.step.workers):
|
|
177
|
+
receiver = threading.Thread(
|
|
178
|
+
target=self.receive,
|
|
179
|
+
name=f"{str(self.step).capitalize()}Receiver{i}",
|
|
180
|
+
args=(),
|
|
181
|
+
)
|
|
182
|
+
receiver.start()
|
|
183
|
+
receivers.append(receiver)
|
|
184
|
+
|
|
185
|
+
sender.join()
|
|
186
|
+
for receiver in receivers:
|
|
187
|
+
receiver.join()
|
|
188
|
+
|
|
189
|
+
def process(self):
|
|
190
|
+
scheduled = self.get_scheduled()
|
|
191
|
+
assert isinstance(scheduled, List)
|
|
192
|
+
|
|
193
|
+
if len(scheduled) > 0:
|
|
194
|
+
LOGGER.info("start", extra={"label": str(self.step)})
|
|
195
|
+
|
|
196
|
+
p = Process(target=self._process())
|
|
197
|
+
p.start()
|
|
198
|
+
p.join(timeout=self.step.timeouts.step)
|
|
199
|
+
p.terminate()
|
|
200
|
+
|
|
201
|
+
self.queue.delete()
|
|
202
|
+
|
|
203
|
+
if p.exitcode is None:
|
|
204
|
+
LOGGER.critical("timeout", extra={"label": str(self.step)})
|
|
205
|
+
raise ValueError(f"{self.step} timed out")
|
|
206
|
+
|
|
207
|
+
else:
|
|
208
|
+
df = self.get_logs(str(self.step))
|
|
209
|
+
self.write_logs(df)
|
|
210
|
+
|
|
211
|
+
LOGGER.info("end", extra={"label": str(self.step)})
|
|
212
|
+
|
|
213
|
+
else:
|
|
214
|
+
LOGGER.info("no job to schedule", extra={"label": str(self.step)})
|
|
215
|
+
|
|
216
|
+
def __str__(self) -> str:
|
|
217
|
+
return f"{str(self.step)} ({self.schedule_id})"
|
|
218
|
+
|
|
219
|
+
def __enter__(self):
|
|
220
|
+
return super().__enter__()
|
|
221
|
+
|
|
222
|
+
def __exit__(self, *args, **kwargs):
|
|
223
|
+
if self._azure_queue:
|
|
224
|
+
self._azure_queue.__exit__()
|
|
225
|
+
if self._azure_table:
|
|
226
|
+
self._azure_table.__exit__()
|
|
227
|
+
|
|
228
|
+
return super().__exit__(*args, **kwargs)
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
from typing import Optional
|
|
2
|
+
|
|
3
|
+
from fabricks.core.dags.log import LOGGER, TABLE_LOG_HANDLER
|
|
4
|
+
from fabricks.core.jobs import get_job
|
|
5
|
+
from fabricks.core.jobs.base.exception import CheckWarning, SkipRunCheckWarning
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def run(step: str, job_id: str, schedule_id: str, schedule: str, notebook_id: Optional[str] = None):
|
|
9
|
+
job = get_job(step=step, job_id=job_id)
|
|
10
|
+
|
|
11
|
+
extra = {
|
|
12
|
+
"partition_key": schedule_id,
|
|
13
|
+
"schedule_id": schedule_id,
|
|
14
|
+
"schedule": schedule,
|
|
15
|
+
"step": step,
|
|
16
|
+
"job": job,
|
|
17
|
+
"target": "buffer",
|
|
18
|
+
}
|
|
19
|
+
if notebook_id is not None:
|
|
20
|
+
extra["notebook_id"] = notebook_id
|
|
21
|
+
|
|
22
|
+
LOGGER.info("running", extra=extra)
|
|
23
|
+
|
|
24
|
+
try:
|
|
25
|
+
job.run(schedule_id=schedule_id, schedule=schedule)
|
|
26
|
+
LOGGER.info("done", extra=extra)
|
|
27
|
+
|
|
28
|
+
except SkipRunCheckWarning:
|
|
29
|
+
LOGGER.exception("skipped", extra=extra)
|
|
30
|
+
|
|
31
|
+
except CheckWarning:
|
|
32
|
+
LOGGER.exception("warned", extra=extra)
|
|
33
|
+
|
|
34
|
+
except Exception as e:
|
|
35
|
+
LOGGER.exception("failed", extra=extra)
|
|
36
|
+
raise e
|
|
37
|
+
|
|
38
|
+
finally:
|
|
39
|
+
TABLE_LOG_HANDLER.flush()
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
from fabricks.context import SPARK
|
|
2
|
+
from fabricks.core.dags.base import BaseDags
|
|
3
|
+
from fabricks.core.dags.log import LOGGER, TABLE_LOG_HANDLER
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class DagTerminator(BaseDags):
|
|
7
|
+
def __init__(self, schedule_id: str):
|
|
8
|
+
self.schedule_id = schedule_id
|
|
9
|
+
super().__init__(schedule_id=schedule_id)
|
|
10
|
+
|
|
11
|
+
def terminate(self):
|
|
12
|
+
df = self.get_logs()
|
|
13
|
+
self.write_logs(df)
|
|
14
|
+
|
|
15
|
+
error_df = SPARK.sql("select * from {df} where status = 'failed'", df=df)
|
|
16
|
+
for row in error_df.collect():
|
|
17
|
+
LOGGER.error(f"{row['job']} failed (🔥)")
|
|
18
|
+
|
|
19
|
+
TABLE_LOG_HANDLER.table.truncate_partition(self.schedule_id)
|
|
20
|
+
|
|
21
|
+
table = self.get_table()
|
|
22
|
+
table.drop()
|
|
23
|
+
|
|
24
|
+
if not error_df.isEmpty():
|
|
25
|
+
raise ValueError(f"{error_df.count()} job(s) failed")
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
from typing import Optional, cast
|
|
2
|
+
|
|
3
|
+
from fabricks.context import DBUTILS, FABRICKS_STORAGE, FABRICKS_STORAGE_CREDENTIAL, IS_UNITY_CATALOG, SECRET_SCOPE
|
|
4
|
+
from fabricks.utils.azure_table import AzureTable
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def _get_access_key_from_secret_scope(storage_account: str) -> str:
|
|
8
|
+
from fabricks.context.secret import AccessKey, get_secret_from_secret_scope
|
|
9
|
+
|
|
10
|
+
secret = get_secret_from_secret_scope(SECRET_SCOPE, f"{storage_account}-access-key")
|
|
11
|
+
return cast(AccessKey, secret).key
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def _get_access_key_from_os() -> Optional[str]:
|
|
15
|
+
import os
|
|
16
|
+
|
|
17
|
+
return os.environ.get("FABRICKS_ACCESS_KEY")
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def get_connection_info(storage_account: str) -> dict:
|
|
21
|
+
credential = None
|
|
22
|
+
|
|
23
|
+
if not IS_UNITY_CATALOG:
|
|
24
|
+
access_key = _get_access_key_from_secret_scope(storage_account)
|
|
25
|
+
|
|
26
|
+
else:
|
|
27
|
+
access_key = _get_access_key_from_os()
|
|
28
|
+
if not access_key:
|
|
29
|
+
access_key = _get_access_key_from_secret_scope(storage_account)
|
|
30
|
+
|
|
31
|
+
if FABRICKS_STORAGE_CREDENTIAL:
|
|
32
|
+
assert DBUTILS
|
|
33
|
+
credential = DBUTILS.credentials.getServiceCredentialsProvider(FABRICKS_STORAGE_CREDENTIAL) # type: ignore
|
|
34
|
+
|
|
35
|
+
assert credential or access_key
|
|
36
|
+
|
|
37
|
+
return {
|
|
38
|
+
"storage_account": storage_account,
|
|
39
|
+
"access_key": access_key,
|
|
40
|
+
"credential": credential,
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def get_table():
|
|
45
|
+
storage_account = FABRICKS_STORAGE.get_storage_account()
|
|
46
|
+
|
|
47
|
+
cx = get_connection_info(storage_account)
|
|
48
|
+
|
|
49
|
+
return AzureTable(
|
|
50
|
+
"dags",
|
|
51
|
+
storage_account=storage_account,
|
|
52
|
+
access_key=cx["access_key"],
|
|
53
|
+
credential=cx["credential"],
|
|
54
|
+
)
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
from importlib.util import module_from_spec, spec_from_file_location
|
|
2
|
+
from typing import Callable
|
|
3
|
+
|
|
4
|
+
from fabricks.context import IS_UNITY_CATALOG, PATH_EXTENDERS
|
|
5
|
+
from fabricks.context.log import DEFAULT_LOGGER
|
|
6
|
+
|
|
7
|
+
EXTENDERS: dict[str, Callable] = {}
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def get_extender(name: str) -> Callable:
|
|
11
|
+
path = PATH_EXTENDERS.joinpath(f"{name}.py")
|
|
12
|
+
if not IS_UNITY_CATALOG:
|
|
13
|
+
assert path.exists(), "no valid extender found in {path.string}"
|
|
14
|
+
else:
|
|
15
|
+
DEFAULT_LOGGER.debug(f"could not check if extender exists ({path.string})")
|
|
16
|
+
|
|
17
|
+
spec = spec_from_file_location(name, path.string)
|
|
18
|
+
assert spec, "no valid extender found in {path.string}"
|
|
19
|
+
assert spec.loader is not None
|
|
20
|
+
|
|
21
|
+
mod = module_from_spec(spec)
|
|
22
|
+
spec.loader.exec_module(mod)
|
|
23
|
+
e = EXTENDERS[name]
|
|
24
|
+
|
|
25
|
+
return e
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def extender(name: str):
|
|
29
|
+
def decorator(fn: Callable):
|
|
30
|
+
EXTENDERS[name] = fn
|
|
31
|
+
return fn
|
|
32
|
+
|
|
33
|
+
return decorator
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
from dataclasses import dataclass
|
|
2
|
+
from typing import List
|
|
3
|
+
|
|
4
|
+
from fabricks.core.jobs.base._types import JobConf
|
|
5
|
+
from fabricks.utils.schema import get_json_schema_for_type
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def get_job_schema() -> str:
|
|
9
|
+
import json
|
|
10
|
+
|
|
11
|
+
@dataclass
|
|
12
|
+
class JobWrapper:
|
|
13
|
+
job: JobConf
|
|
14
|
+
|
|
15
|
+
sc = get_json_schema_for_type(List[JobWrapper])
|
|
16
|
+
defs: dict[str, dict] = sc["$defs"]
|
|
17
|
+
removals = [("Job", "job_id"), ("Job", "table")]
|
|
18
|
+
|
|
19
|
+
for key, defi in defs.items():
|
|
20
|
+
for ent, prop in removals:
|
|
21
|
+
if key.startswith(ent) and prop in defi["properties"]:
|
|
22
|
+
req: List[str] = defi["required"]
|
|
23
|
+
req.remove(prop) # not defined in yaml
|
|
24
|
+
jobprops: dict = defi["properties"]
|
|
25
|
+
jobprops.pop(prop)
|
|
26
|
+
|
|
27
|
+
j = json.dumps(sc, indent=4)
|
|
28
|
+
return j
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def print_job_schema():
|
|
32
|
+
print(get_job_schema())
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
from fabricks.core.jobs.base import BaseJob, Bronzes, Golds, Silvers, Steps
|
|
2
|
+
from fabricks.core.jobs.bronze import Bronze
|
|
3
|
+
from fabricks.core.jobs.get_job import get_job
|
|
4
|
+
from fabricks.core.jobs.get_job_id import get_job_id
|
|
5
|
+
from fabricks.core.jobs.get_jobs import get_jobs
|
|
6
|
+
from fabricks.core.jobs.gold import Gold
|
|
7
|
+
from fabricks.core.jobs.silver import Silver
|
|
8
|
+
|
|
9
|
+
__all__ = [
|
|
10
|
+
"BaseJob",
|
|
11
|
+
"Bronze",
|
|
12
|
+
"Bronzes",
|
|
13
|
+
"get_job_id",
|
|
14
|
+
"get_job",
|
|
15
|
+
"get_jobs",
|
|
16
|
+
"Gold",
|
|
17
|
+
"Golds",
|
|
18
|
+
"Silver",
|
|
19
|
+
"Silvers",
|
|
20
|
+
"Steps",
|
|
21
|
+
]
|