fabricks 3.0.11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (176) hide show
  1. fabricks/__init__.py +0 -0
  2. fabricks/api/__init__.py +11 -0
  3. fabricks/api/cdc/__init__.py +6 -0
  4. fabricks/api/cdc/nocdc.py +3 -0
  5. fabricks/api/cdc/scd1.py +3 -0
  6. fabricks/api/cdc/scd2.py +3 -0
  7. fabricks/api/context.py +27 -0
  8. fabricks/api/core.py +4 -0
  9. fabricks/api/deploy.py +3 -0
  10. fabricks/api/exceptions.py +19 -0
  11. fabricks/api/extenders.py +3 -0
  12. fabricks/api/job_schema.py +3 -0
  13. fabricks/api/log.py +3 -0
  14. fabricks/api/masks.py +3 -0
  15. fabricks/api/metastore/__init__.py +10 -0
  16. fabricks/api/metastore/database.py +3 -0
  17. fabricks/api/metastore/table.py +3 -0
  18. fabricks/api/metastore/view.py +6 -0
  19. fabricks/api/notebooks/__init__.py +0 -0
  20. fabricks/api/notebooks/cluster.py +6 -0
  21. fabricks/api/notebooks/initialize.py +42 -0
  22. fabricks/api/notebooks/process.py +54 -0
  23. fabricks/api/notebooks/run.py +59 -0
  24. fabricks/api/notebooks/schedule.py +75 -0
  25. fabricks/api/notebooks/terminate.py +31 -0
  26. fabricks/api/parsers.py +3 -0
  27. fabricks/api/schedules.py +3 -0
  28. fabricks/api/udfs.py +3 -0
  29. fabricks/api/utils.py +9 -0
  30. fabricks/api/version.py +3 -0
  31. fabricks/api/views.py +6 -0
  32. fabricks/cdc/__init__.py +14 -0
  33. fabricks/cdc/base/__init__.py +4 -0
  34. fabricks/cdc/base/_types.py +10 -0
  35. fabricks/cdc/base/cdc.py +5 -0
  36. fabricks/cdc/base/configurator.py +223 -0
  37. fabricks/cdc/base/generator.py +177 -0
  38. fabricks/cdc/base/merger.py +110 -0
  39. fabricks/cdc/base/processor.py +471 -0
  40. fabricks/cdc/cdc.py +5 -0
  41. fabricks/cdc/nocdc.py +20 -0
  42. fabricks/cdc/scd.py +22 -0
  43. fabricks/cdc/scd1.py +15 -0
  44. fabricks/cdc/scd2.py +15 -0
  45. fabricks/cdc/templates/__init__.py +0 -0
  46. fabricks/cdc/templates/ctes/base.sql.jinja +35 -0
  47. fabricks/cdc/templates/ctes/current.sql.jinja +28 -0
  48. fabricks/cdc/templates/ctes/deduplicate_hash.sql.jinja +32 -0
  49. fabricks/cdc/templates/ctes/deduplicate_key.sql.jinja +31 -0
  50. fabricks/cdc/templates/ctes/rectify.sql.jinja +113 -0
  51. fabricks/cdc/templates/ctes/slice.sql.jinja +1 -0
  52. fabricks/cdc/templates/filter.sql.jinja +4 -0
  53. fabricks/cdc/templates/filters/final.sql.jinja +4 -0
  54. fabricks/cdc/templates/filters/latest.sql.jinja +17 -0
  55. fabricks/cdc/templates/filters/update.sql.jinja +30 -0
  56. fabricks/cdc/templates/macros/bactick.sql.jinja +1 -0
  57. fabricks/cdc/templates/macros/hash.sql.jinja +18 -0
  58. fabricks/cdc/templates/merge.sql.jinja +3 -0
  59. fabricks/cdc/templates/merges/nocdc.sql.jinja +41 -0
  60. fabricks/cdc/templates/merges/scd1.sql.jinja +73 -0
  61. fabricks/cdc/templates/merges/scd2.sql.jinja +54 -0
  62. fabricks/cdc/templates/queries/__init__.py +0 -0
  63. fabricks/cdc/templates/queries/context.sql.jinja +186 -0
  64. fabricks/cdc/templates/queries/final.sql.jinja +1 -0
  65. fabricks/cdc/templates/queries/nocdc/complete.sql.jinja +10 -0
  66. fabricks/cdc/templates/queries/nocdc/update.sql.jinja +34 -0
  67. fabricks/cdc/templates/queries/scd1.sql.jinja +85 -0
  68. fabricks/cdc/templates/queries/scd2.sql.jinja +98 -0
  69. fabricks/cdc/templates/query.sql.jinja +15 -0
  70. fabricks/context/__init__.py +72 -0
  71. fabricks/context/_types.py +133 -0
  72. fabricks/context/config/__init__.py +92 -0
  73. fabricks/context/config/utils.py +53 -0
  74. fabricks/context/log.py +77 -0
  75. fabricks/context/runtime.py +117 -0
  76. fabricks/context/secret.py +103 -0
  77. fabricks/context/spark_session.py +82 -0
  78. fabricks/context/utils.py +80 -0
  79. fabricks/core/__init__.py +4 -0
  80. fabricks/core/dags/__init__.py +9 -0
  81. fabricks/core/dags/base.py +99 -0
  82. fabricks/core/dags/generator.py +157 -0
  83. fabricks/core/dags/log.py +12 -0
  84. fabricks/core/dags/processor.py +228 -0
  85. fabricks/core/dags/run.py +39 -0
  86. fabricks/core/dags/terminator.py +25 -0
  87. fabricks/core/dags/utils.py +54 -0
  88. fabricks/core/extenders.py +33 -0
  89. fabricks/core/job_schema.py +32 -0
  90. fabricks/core/jobs/__init__.py +21 -0
  91. fabricks/core/jobs/base/__init__.py +10 -0
  92. fabricks/core/jobs/base/_types.py +284 -0
  93. fabricks/core/jobs/base/checker.py +139 -0
  94. fabricks/core/jobs/base/configurator.py +306 -0
  95. fabricks/core/jobs/base/exception.py +85 -0
  96. fabricks/core/jobs/base/generator.py +447 -0
  97. fabricks/core/jobs/base/invoker.py +206 -0
  98. fabricks/core/jobs/base/job.py +5 -0
  99. fabricks/core/jobs/base/processor.py +249 -0
  100. fabricks/core/jobs/bronze.py +395 -0
  101. fabricks/core/jobs/get_job.py +127 -0
  102. fabricks/core/jobs/get_job_conf.py +152 -0
  103. fabricks/core/jobs/get_job_id.py +31 -0
  104. fabricks/core/jobs/get_jobs.py +107 -0
  105. fabricks/core/jobs/get_schedule.py +10 -0
  106. fabricks/core/jobs/get_schedules.py +32 -0
  107. fabricks/core/jobs/gold.py +415 -0
  108. fabricks/core/jobs/silver.py +373 -0
  109. fabricks/core/masks.py +52 -0
  110. fabricks/core/parsers/__init__.py +12 -0
  111. fabricks/core/parsers/_types.py +6 -0
  112. fabricks/core/parsers/base.py +95 -0
  113. fabricks/core/parsers/decorator.py +11 -0
  114. fabricks/core/parsers/get_parser.py +26 -0
  115. fabricks/core/parsers/utils.py +69 -0
  116. fabricks/core/schedules/__init__.py +14 -0
  117. fabricks/core/schedules/diagrams.py +21 -0
  118. fabricks/core/schedules/generate.py +20 -0
  119. fabricks/core/schedules/get_schedule.py +5 -0
  120. fabricks/core/schedules/get_schedules.py +9 -0
  121. fabricks/core/schedules/process.py +9 -0
  122. fabricks/core/schedules/run.py +3 -0
  123. fabricks/core/schedules/terminate.py +6 -0
  124. fabricks/core/schedules/views.py +61 -0
  125. fabricks/core/steps/__init__.py +4 -0
  126. fabricks/core/steps/_types.py +7 -0
  127. fabricks/core/steps/base.py +423 -0
  128. fabricks/core/steps/get_step.py +10 -0
  129. fabricks/core/steps/get_step_conf.py +26 -0
  130. fabricks/core/udfs.py +106 -0
  131. fabricks/core/views.py +41 -0
  132. fabricks/deploy/__init__.py +92 -0
  133. fabricks/deploy/masks.py +8 -0
  134. fabricks/deploy/notebooks.py +71 -0
  135. fabricks/deploy/schedules.py +10 -0
  136. fabricks/deploy/tables.py +82 -0
  137. fabricks/deploy/udfs.py +19 -0
  138. fabricks/deploy/utils.py +36 -0
  139. fabricks/deploy/views.py +509 -0
  140. fabricks/metastore/README.md +3 -0
  141. fabricks/metastore/__init__.py +5 -0
  142. fabricks/metastore/_types.py +65 -0
  143. fabricks/metastore/database.py +65 -0
  144. fabricks/metastore/dbobject.py +66 -0
  145. fabricks/metastore/pyproject.toml +20 -0
  146. fabricks/metastore/table.py +768 -0
  147. fabricks/metastore/utils.py +51 -0
  148. fabricks/metastore/view.py +53 -0
  149. fabricks/utils/__init__.py +0 -0
  150. fabricks/utils/_types.py +6 -0
  151. fabricks/utils/azure_queue.py +93 -0
  152. fabricks/utils/azure_table.py +154 -0
  153. fabricks/utils/console.py +51 -0
  154. fabricks/utils/fdict.py +240 -0
  155. fabricks/utils/helpers.py +228 -0
  156. fabricks/utils/log.py +236 -0
  157. fabricks/utils/mermaid.py +32 -0
  158. fabricks/utils/path.py +242 -0
  159. fabricks/utils/pip.py +61 -0
  160. fabricks/utils/pydantic.py +94 -0
  161. fabricks/utils/read/__init__.py +11 -0
  162. fabricks/utils/read/_types.py +3 -0
  163. fabricks/utils/read/read.py +305 -0
  164. fabricks/utils/read/read_excel.py +5 -0
  165. fabricks/utils/read/read_yaml.py +33 -0
  166. fabricks/utils/schema/__init__.py +7 -0
  167. fabricks/utils/schema/get_json_schema_for_type.py +161 -0
  168. fabricks/utils/schema/get_schema_for_type.py +99 -0
  169. fabricks/utils/spark.py +76 -0
  170. fabricks/utils/sqlglot.py +56 -0
  171. fabricks/utils/write/__init__.py +8 -0
  172. fabricks/utils/write/delta.py +46 -0
  173. fabricks/utils/write/stream.py +27 -0
  174. fabricks-3.0.11.dist-info/METADATA +23 -0
  175. fabricks-3.0.11.dist-info/RECORD +176 -0
  176. fabricks-3.0.11.dist-info/WHEEL +4 -0
@@ -0,0 +1,228 @@
1
+ import json
2
+ import threading
3
+ import time
4
+ from multiprocessing import Process
5
+ from typing import Any, List, Union
6
+
7
+ from azure.core.exceptions import AzureError
8
+ from databricks.sdk.runtime import dbutils, spark
9
+ from tenacity import retry, retry_if_exception_type, stop_after_attempt, wait_exponential
10
+
11
+ from fabricks.context import PATH_NOTEBOOKS
12
+ from fabricks.core.dags.base import BaseDags
13
+ from fabricks.core.dags.log import LOGGER
14
+ from fabricks.core.dags.run import run
15
+ from fabricks.core.jobs.base._types import TStep
16
+ from fabricks.core.steps.get_step import get_step
17
+ from fabricks.utils.azure_queue import AzureQueue
18
+ from fabricks.utils.azure_table import AzureTable
19
+
20
+
21
+ class DagProcessor(BaseDags):
22
+ def __init__(self, schedule_id: str, schedule: str, step: Union[TStep, str], notebook: bool = True):
23
+ self.step = get_step(step=step)
24
+ self.schedule = schedule
25
+
26
+ self.notebook = notebook
27
+
28
+ self._azure_queue = None
29
+ self._azure_table = None
30
+
31
+ super().__init__(schedule_id=schedule_id)
32
+
33
+ @property
34
+ def queue(self) -> AzureQueue:
35
+ if not self._azure_queue:
36
+ step = self.remove_invalid_characters(str(self.step))
37
+ self._azure_queue = AzureQueue(
38
+ f"q{step}{self.schedule_id}",
39
+ **self.get_connection_info(), # type: ignore
40
+ )
41
+ return self._azure_queue
42
+
43
+ @property
44
+ def table(self) -> AzureTable:
45
+ if not self._azure_table:
46
+ self._azure_table = AzureTable(f"t{self.schedule_id}", **self.get_connection_info()) # type: ignore
47
+ return self._azure_table
48
+
49
+ @retry(
50
+ stop=stop_after_attempt(3),
51
+ wait=wait_exponential(multiplier=1, min=1, max=10),
52
+ retry=retry_if_exception_type((Exception, AzureError)),
53
+ reraise=True,
54
+ )
55
+ def query(self, data: Any) -> List[dict]:
56
+ return self.table.query(data)
57
+
58
+ @retry(
59
+ stop=stop_after_attempt(3),
60
+ wait=wait_exponential(multiplier=1, min=1, max=10),
61
+ retry=retry_if_exception_type((Exception, AzureError)),
62
+ reraise=True,
63
+ )
64
+ def upsert(self, data: Any) -> None:
65
+ self.table.upsert(data)
66
+
67
+ @retry(
68
+ stop=stop_after_attempt(3),
69
+ wait=wait_exponential(multiplier=1, min=1, max=10),
70
+ retry=retry_if_exception_type((Exception, AzureError)),
71
+ reraise=True,
72
+ )
73
+ def delete(self, data: Any) -> None:
74
+ self.table.delete(data)
75
+
76
+ def extra(self, d: dict) -> dict:
77
+ return {
78
+ "partition_key": self.schedule_id,
79
+ "schedule": self.schedule,
80
+ "schedule_id": self.schedule_id,
81
+ "step": str(self.step),
82
+ "job": d.get("Job"),
83
+ "target": "table",
84
+ }
85
+
86
+ def send(self):
87
+ while True:
88
+ scheduled = self.get_scheduled()
89
+ assert isinstance(scheduled, List)
90
+ if len(scheduled) == 0:
91
+ for _ in range(self.step.workers):
92
+ self.queue.send_sentinel()
93
+ LOGGER.info("no more job to schedule", extra={"label": str(self.step)})
94
+ break
95
+
96
+ else:
97
+ sorted_scheduled = sorted(scheduled, key=lambda x: x.get("Rank"))
98
+ for s in sorted_scheduled:
99
+ dependencies = self.table.query(f"PartitionKey eq 'dependencies' and JobId eq '{s.get('JobId')}'")
100
+
101
+ if len(dependencies) == 0:
102
+ s["Status"] = "waiting"
103
+ LOGGER.debug("waiting", extra=self.extra(s))
104
+ self.table.upsert(s)
105
+ self.queue.send(s)
106
+
107
+ time.sleep(5)
108
+
109
+ def receive(self):
110
+ while True:
111
+ response = self.queue.receive()
112
+ if response == self.queue.sentinel:
113
+ LOGGER.info("no more job to process", extra={"label": str(self.step)})
114
+ break
115
+
116
+ elif response:
117
+ j = json.loads(response)
118
+
119
+ j["Status"] = "starting"
120
+ self.table.upsert(j)
121
+ LOGGER.info("start", extra=self.extra(j))
122
+
123
+ try:
124
+ if self.notebook:
125
+ dbutils.notebook.run( # type: ignore
126
+ PATH_NOTEBOOKS.joinpath("run").get_notebook_path(), # type: ignore
127
+ self.step.timeouts.job, # type: ignore
128
+ {
129
+ "schedule_id": self.schedule_id,
130
+ "schedule": self.schedule, # needed to pass schedule variables to the job
131
+ "step": str(self.step),
132
+ "job_id": j.get("JobId"),
133
+ "job": j.get("Job"),
134
+ }, # type: ignore
135
+ )
136
+
137
+ else:
138
+ run(
139
+ step=str(self.step),
140
+ job_id=j.get("JobId"),
141
+ schedule_id=self.schedule_id,
142
+ schedule=self.schedule,
143
+ )
144
+
145
+ except Exception:
146
+ LOGGER.warning("fail", extra={"label": j.get("Job")})
147
+
148
+ finally:
149
+ j["Status"] = "ok"
150
+ self.table.upsert(j)
151
+ LOGGER.info("end", extra=self.extra(j))
152
+
153
+ dependencies = self.table.query(f"PartitionKey eq 'dependencies' and ParentId eq '{j.get('JobId')}'")
154
+ self.table.delete(dependencies)
155
+
156
+ def get_scheduled(self, convert: bool = False):
157
+ scheduled = self.table.query(f"PartitionKey eq 'statuses' and Status eq 'scheduled' and Step eq '{self.step}'")
158
+ if convert:
159
+ return spark.createDataFrame(scheduled)
160
+ else:
161
+ return scheduled
162
+
163
+ def _process(self):
164
+ scheduled = self.get_scheduled()
165
+ assert isinstance(scheduled, List)
166
+
167
+ if len(scheduled) > 0:
168
+ sender = threading.Thread(
169
+ target=self.send,
170
+ name=f"{str(self.step).capitalize()}Sender",
171
+ args=(),
172
+ )
173
+ sender.start()
174
+
175
+ receivers = []
176
+ for i in range(self.step.workers):
177
+ receiver = threading.Thread(
178
+ target=self.receive,
179
+ name=f"{str(self.step).capitalize()}Receiver{i}",
180
+ args=(),
181
+ )
182
+ receiver.start()
183
+ receivers.append(receiver)
184
+
185
+ sender.join()
186
+ for receiver in receivers:
187
+ receiver.join()
188
+
189
+ def process(self):
190
+ scheduled = self.get_scheduled()
191
+ assert isinstance(scheduled, List)
192
+
193
+ if len(scheduled) > 0:
194
+ LOGGER.info("start", extra={"label": str(self.step)})
195
+
196
+ p = Process(target=self._process())
197
+ p.start()
198
+ p.join(timeout=self.step.timeouts.step)
199
+ p.terminate()
200
+
201
+ self.queue.delete()
202
+
203
+ if p.exitcode is None:
204
+ LOGGER.critical("timeout", extra={"label": str(self.step)})
205
+ raise ValueError(f"{self.step} timed out")
206
+
207
+ else:
208
+ df = self.get_logs(str(self.step))
209
+ self.write_logs(df)
210
+
211
+ LOGGER.info("end", extra={"label": str(self.step)})
212
+
213
+ else:
214
+ LOGGER.info("no job to schedule", extra={"label": str(self.step)})
215
+
216
+ def __str__(self) -> str:
217
+ return f"{str(self.step)} ({self.schedule_id})"
218
+
219
+ def __enter__(self):
220
+ return super().__enter__()
221
+
222
+ def __exit__(self, *args, **kwargs):
223
+ if self._azure_queue:
224
+ self._azure_queue.__exit__()
225
+ if self._azure_table:
226
+ self._azure_table.__exit__()
227
+
228
+ return super().__exit__(*args, **kwargs)
@@ -0,0 +1,39 @@
1
+ from typing import Optional
2
+
3
+ from fabricks.core.dags.log import LOGGER, TABLE_LOG_HANDLER
4
+ from fabricks.core.jobs import get_job
5
+ from fabricks.core.jobs.base.exception import CheckWarning, SkipRunCheckWarning
6
+
7
+
8
+ def run(step: str, job_id: str, schedule_id: str, schedule: str, notebook_id: Optional[str] = None):
9
+ job = get_job(step=step, job_id=job_id)
10
+
11
+ extra = {
12
+ "partition_key": schedule_id,
13
+ "schedule_id": schedule_id,
14
+ "schedule": schedule,
15
+ "step": step,
16
+ "job": job,
17
+ "target": "buffer",
18
+ }
19
+ if notebook_id is not None:
20
+ extra["notebook_id"] = notebook_id
21
+
22
+ LOGGER.info("running", extra=extra)
23
+
24
+ try:
25
+ job.run(schedule_id=schedule_id, schedule=schedule)
26
+ LOGGER.info("done", extra=extra)
27
+
28
+ except SkipRunCheckWarning:
29
+ LOGGER.exception("skipped", extra=extra)
30
+
31
+ except CheckWarning:
32
+ LOGGER.exception("warned", extra=extra)
33
+
34
+ except Exception as e:
35
+ LOGGER.exception("failed", extra=extra)
36
+ raise e
37
+
38
+ finally:
39
+ TABLE_LOG_HANDLER.flush()
@@ -0,0 +1,25 @@
1
+ from fabricks.context import SPARK
2
+ from fabricks.core.dags.base import BaseDags
3
+ from fabricks.core.dags.log import LOGGER, TABLE_LOG_HANDLER
4
+
5
+
6
+ class DagTerminator(BaseDags):
7
+ def __init__(self, schedule_id: str):
8
+ self.schedule_id = schedule_id
9
+ super().__init__(schedule_id=schedule_id)
10
+
11
+ def terminate(self):
12
+ df = self.get_logs()
13
+ self.write_logs(df)
14
+
15
+ error_df = SPARK.sql("select * from {df} where status = 'failed'", df=df)
16
+ for row in error_df.collect():
17
+ LOGGER.error(f"{row['job']} failed (🔥)")
18
+
19
+ TABLE_LOG_HANDLER.table.truncate_partition(self.schedule_id)
20
+
21
+ table = self.get_table()
22
+ table.drop()
23
+
24
+ if not error_df.isEmpty():
25
+ raise ValueError(f"{error_df.count()} job(s) failed")
@@ -0,0 +1,54 @@
1
+ from typing import Optional, cast
2
+
3
+ from fabricks.context import DBUTILS, FABRICKS_STORAGE, FABRICKS_STORAGE_CREDENTIAL, IS_UNITY_CATALOG, SECRET_SCOPE
4
+ from fabricks.utils.azure_table import AzureTable
5
+
6
+
7
+ def _get_access_key_from_secret_scope(storage_account: str) -> str:
8
+ from fabricks.context.secret import AccessKey, get_secret_from_secret_scope
9
+
10
+ secret = get_secret_from_secret_scope(SECRET_SCOPE, f"{storage_account}-access-key")
11
+ return cast(AccessKey, secret).key
12
+
13
+
14
+ def _get_access_key_from_os() -> Optional[str]:
15
+ import os
16
+
17
+ return os.environ.get("FABRICKS_ACCESS_KEY")
18
+
19
+
20
+ def get_connection_info(storage_account: str) -> dict:
21
+ credential = None
22
+
23
+ if not IS_UNITY_CATALOG:
24
+ access_key = _get_access_key_from_secret_scope(storage_account)
25
+
26
+ else:
27
+ access_key = _get_access_key_from_os()
28
+ if not access_key:
29
+ access_key = _get_access_key_from_secret_scope(storage_account)
30
+
31
+ if FABRICKS_STORAGE_CREDENTIAL:
32
+ assert DBUTILS
33
+ credential = DBUTILS.credentials.getServiceCredentialsProvider(FABRICKS_STORAGE_CREDENTIAL) # type: ignore
34
+
35
+ assert credential or access_key
36
+
37
+ return {
38
+ "storage_account": storage_account,
39
+ "access_key": access_key,
40
+ "credential": credential,
41
+ }
42
+
43
+
44
+ def get_table():
45
+ storage_account = FABRICKS_STORAGE.get_storage_account()
46
+
47
+ cx = get_connection_info(storage_account)
48
+
49
+ return AzureTable(
50
+ "dags",
51
+ storage_account=storage_account,
52
+ access_key=cx["access_key"],
53
+ credential=cx["credential"],
54
+ )
@@ -0,0 +1,33 @@
1
+ from importlib.util import module_from_spec, spec_from_file_location
2
+ from typing import Callable
3
+
4
+ from fabricks.context import IS_UNITY_CATALOG, PATH_EXTENDERS
5
+ from fabricks.context.log import DEFAULT_LOGGER
6
+
7
+ EXTENDERS: dict[str, Callable] = {}
8
+
9
+
10
+ def get_extender(name: str) -> Callable:
11
+ path = PATH_EXTENDERS.joinpath(f"{name}.py")
12
+ if not IS_UNITY_CATALOG:
13
+ assert path.exists(), "no valid extender found in {path.string}"
14
+ else:
15
+ DEFAULT_LOGGER.debug(f"could not check if extender exists ({path.string})")
16
+
17
+ spec = spec_from_file_location(name, path.string)
18
+ assert spec, "no valid extender found in {path.string}"
19
+ assert spec.loader is not None
20
+
21
+ mod = module_from_spec(spec)
22
+ spec.loader.exec_module(mod)
23
+ e = EXTENDERS[name]
24
+
25
+ return e
26
+
27
+
28
+ def extender(name: str):
29
+ def decorator(fn: Callable):
30
+ EXTENDERS[name] = fn
31
+ return fn
32
+
33
+ return decorator
@@ -0,0 +1,32 @@
1
+ from dataclasses import dataclass
2
+ from typing import List
3
+
4
+ from fabricks.core.jobs.base._types import JobConf
5
+ from fabricks.utils.schema import get_json_schema_for_type
6
+
7
+
8
+ def get_job_schema() -> str:
9
+ import json
10
+
11
+ @dataclass
12
+ class JobWrapper:
13
+ job: JobConf
14
+
15
+ sc = get_json_schema_for_type(List[JobWrapper])
16
+ defs: dict[str, dict] = sc["$defs"]
17
+ removals = [("Job", "job_id"), ("Job", "table")]
18
+
19
+ for key, defi in defs.items():
20
+ for ent, prop in removals:
21
+ if key.startswith(ent) and prop in defi["properties"]:
22
+ req: List[str] = defi["required"]
23
+ req.remove(prop) # not defined in yaml
24
+ jobprops: dict = defi["properties"]
25
+ jobprops.pop(prop)
26
+
27
+ j = json.dumps(sc, indent=4)
28
+ return j
29
+
30
+
31
+ def print_job_schema():
32
+ print(get_job_schema())
@@ -0,0 +1,21 @@
1
+ from fabricks.core.jobs.base import BaseJob, Bronzes, Golds, Silvers, Steps
2
+ from fabricks.core.jobs.bronze import Bronze
3
+ from fabricks.core.jobs.get_job import get_job
4
+ from fabricks.core.jobs.get_job_id import get_job_id
5
+ from fabricks.core.jobs.get_jobs import get_jobs
6
+ from fabricks.core.jobs.gold import Gold
7
+ from fabricks.core.jobs.silver import Silver
8
+
9
+ __all__ = [
10
+ "BaseJob",
11
+ "Bronze",
12
+ "Bronzes",
13
+ "get_job_id",
14
+ "get_job",
15
+ "get_jobs",
16
+ "Gold",
17
+ "Golds",
18
+ "Silver",
19
+ "Silvers",
20
+ "Steps",
21
+ ]
@@ -0,0 +1,10 @@
1
+ from fabricks.core.jobs.base._types import Bronzes, Golds, Silvers, Steps
2
+ from fabricks.core.jobs.base.job import BaseJob
3
+
4
+ __all__ = [
5
+ "BaseJob",
6
+ "Bronzes",
7
+ "Golds",
8
+ "Silvers",
9
+ "Steps",
10
+ ]