fabricks 3.0.11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (176) hide show
  1. fabricks/__init__.py +0 -0
  2. fabricks/api/__init__.py +11 -0
  3. fabricks/api/cdc/__init__.py +6 -0
  4. fabricks/api/cdc/nocdc.py +3 -0
  5. fabricks/api/cdc/scd1.py +3 -0
  6. fabricks/api/cdc/scd2.py +3 -0
  7. fabricks/api/context.py +27 -0
  8. fabricks/api/core.py +4 -0
  9. fabricks/api/deploy.py +3 -0
  10. fabricks/api/exceptions.py +19 -0
  11. fabricks/api/extenders.py +3 -0
  12. fabricks/api/job_schema.py +3 -0
  13. fabricks/api/log.py +3 -0
  14. fabricks/api/masks.py +3 -0
  15. fabricks/api/metastore/__init__.py +10 -0
  16. fabricks/api/metastore/database.py +3 -0
  17. fabricks/api/metastore/table.py +3 -0
  18. fabricks/api/metastore/view.py +6 -0
  19. fabricks/api/notebooks/__init__.py +0 -0
  20. fabricks/api/notebooks/cluster.py +6 -0
  21. fabricks/api/notebooks/initialize.py +42 -0
  22. fabricks/api/notebooks/process.py +54 -0
  23. fabricks/api/notebooks/run.py +59 -0
  24. fabricks/api/notebooks/schedule.py +75 -0
  25. fabricks/api/notebooks/terminate.py +31 -0
  26. fabricks/api/parsers.py +3 -0
  27. fabricks/api/schedules.py +3 -0
  28. fabricks/api/udfs.py +3 -0
  29. fabricks/api/utils.py +9 -0
  30. fabricks/api/version.py +3 -0
  31. fabricks/api/views.py +6 -0
  32. fabricks/cdc/__init__.py +14 -0
  33. fabricks/cdc/base/__init__.py +4 -0
  34. fabricks/cdc/base/_types.py +10 -0
  35. fabricks/cdc/base/cdc.py +5 -0
  36. fabricks/cdc/base/configurator.py +223 -0
  37. fabricks/cdc/base/generator.py +177 -0
  38. fabricks/cdc/base/merger.py +110 -0
  39. fabricks/cdc/base/processor.py +471 -0
  40. fabricks/cdc/cdc.py +5 -0
  41. fabricks/cdc/nocdc.py +20 -0
  42. fabricks/cdc/scd.py +22 -0
  43. fabricks/cdc/scd1.py +15 -0
  44. fabricks/cdc/scd2.py +15 -0
  45. fabricks/cdc/templates/__init__.py +0 -0
  46. fabricks/cdc/templates/ctes/base.sql.jinja +35 -0
  47. fabricks/cdc/templates/ctes/current.sql.jinja +28 -0
  48. fabricks/cdc/templates/ctes/deduplicate_hash.sql.jinja +32 -0
  49. fabricks/cdc/templates/ctes/deduplicate_key.sql.jinja +31 -0
  50. fabricks/cdc/templates/ctes/rectify.sql.jinja +113 -0
  51. fabricks/cdc/templates/ctes/slice.sql.jinja +1 -0
  52. fabricks/cdc/templates/filter.sql.jinja +4 -0
  53. fabricks/cdc/templates/filters/final.sql.jinja +4 -0
  54. fabricks/cdc/templates/filters/latest.sql.jinja +17 -0
  55. fabricks/cdc/templates/filters/update.sql.jinja +30 -0
  56. fabricks/cdc/templates/macros/bactick.sql.jinja +1 -0
  57. fabricks/cdc/templates/macros/hash.sql.jinja +18 -0
  58. fabricks/cdc/templates/merge.sql.jinja +3 -0
  59. fabricks/cdc/templates/merges/nocdc.sql.jinja +41 -0
  60. fabricks/cdc/templates/merges/scd1.sql.jinja +73 -0
  61. fabricks/cdc/templates/merges/scd2.sql.jinja +54 -0
  62. fabricks/cdc/templates/queries/__init__.py +0 -0
  63. fabricks/cdc/templates/queries/context.sql.jinja +186 -0
  64. fabricks/cdc/templates/queries/final.sql.jinja +1 -0
  65. fabricks/cdc/templates/queries/nocdc/complete.sql.jinja +10 -0
  66. fabricks/cdc/templates/queries/nocdc/update.sql.jinja +34 -0
  67. fabricks/cdc/templates/queries/scd1.sql.jinja +85 -0
  68. fabricks/cdc/templates/queries/scd2.sql.jinja +98 -0
  69. fabricks/cdc/templates/query.sql.jinja +15 -0
  70. fabricks/context/__init__.py +72 -0
  71. fabricks/context/_types.py +133 -0
  72. fabricks/context/config/__init__.py +92 -0
  73. fabricks/context/config/utils.py +53 -0
  74. fabricks/context/log.py +77 -0
  75. fabricks/context/runtime.py +117 -0
  76. fabricks/context/secret.py +103 -0
  77. fabricks/context/spark_session.py +82 -0
  78. fabricks/context/utils.py +80 -0
  79. fabricks/core/__init__.py +4 -0
  80. fabricks/core/dags/__init__.py +9 -0
  81. fabricks/core/dags/base.py +99 -0
  82. fabricks/core/dags/generator.py +157 -0
  83. fabricks/core/dags/log.py +12 -0
  84. fabricks/core/dags/processor.py +228 -0
  85. fabricks/core/dags/run.py +39 -0
  86. fabricks/core/dags/terminator.py +25 -0
  87. fabricks/core/dags/utils.py +54 -0
  88. fabricks/core/extenders.py +33 -0
  89. fabricks/core/job_schema.py +32 -0
  90. fabricks/core/jobs/__init__.py +21 -0
  91. fabricks/core/jobs/base/__init__.py +10 -0
  92. fabricks/core/jobs/base/_types.py +284 -0
  93. fabricks/core/jobs/base/checker.py +139 -0
  94. fabricks/core/jobs/base/configurator.py +306 -0
  95. fabricks/core/jobs/base/exception.py +85 -0
  96. fabricks/core/jobs/base/generator.py +447 -0
  97. fabricks/core/jobs/base/invoker.py +206 -0
  98. fabricks/core/jobs/base/job.py +5 -0
  99. fabricks/core/jobs/base/processor.py +249 -0
  100. fabricks/core/jobs/bronze.py +395 -0
  101. fabricks/core/jobs/get_job.py +127 -0
  102. fabricks/core/jobs/get_job_conf.py +152 -0
  103. fabricks/core/jobs/get_job_id.py +31 -0
  104. fabricks/core/jobs/get_jobs.py +107 -0
  105. fabricks/core/jobs/get_schedule.py +10 -0
  106. fabricks/core/jobs/get_schedules.py +32 -0
  107. fabricks/core/jobs/gold.py +415 -0
  108. fabricks/core/jobs/silver.py +373 -0
  109. fabricks/core/masks.py +52 -0
  110. fabricks/core/parsers/__init__.py +12 -0
  111. fabricks/core/parsers/_types.py +6 -0
  112. fabricks/core/parsers/base.py +95 -0
  113. fabricks/core/parsers/decorator.py +11 -0
  114. fabricks/core/parsers/get_parser.py +26 -0
  115. fabricks/core/parsers/utils.py +69 -0
  116. fabricks/core/schedules/__init__.py +14 -0
  117. fabricks/core/schedules/diagrams.py +21 -0
  118. fabricks/core/schedules/generate.py +20 -0
  119. fabricks/core/schedules/get_schedule.py +5 -0
  120. fabricks/core/schedules/get_schedules.py +9 -0
  121. fabricks/core/schedules/process.py +9 -0
  122. fabricks/core/schedules/run.py +3 -0
  123. fabricks/core/schedules/terminate.py +6 -0
  124. fabricks/core/schedules/views.py +61 -0
  125. fabricks/core/steps/__init__.py +4 -0
  126. fabricks/core/steps/_types.py +7 -0
  127. fabricks/core/steps/base.py +423 -0
  128. fabricks/core/steps/get_step.py +10 -0
  129. fabricks/core/steps/get_step_conf.py +26 -0
  130. fabricks/core/udfs.py +106 -0
  131. fabricks/core/views.py +41 -0
  132. fabricks/deploy/__init__.py +92 -0
  133. fabricks/deploy/masks.py +8 -0
  134. fabricks/deploy/notebooks.py +71 -0
  135. fabricks/deploy/schedules.py +10 -0
  136. fabricks/deploy/tables.py +82 -0
  137. fabricks/deploy/udfs.py +19 -0
  138. fabricks/deploy/utils.py +36 -0
  139. fabricks/deploy/views.py +509 -0
  140. fabricks/metastore/README.md +3 -0
  141. fabricks/metastore/__init__.py +5 -0
  142. fabricks/metastore/_types.py +65 -0
  143. fabricks/metastore/database.py +65 -0
  144. fabricks/metastore/dbobject.py +66 -0
  145. fabricks/metastore/pyproject.toml +20 -0
  146. fabricks/metastore/table.py +768 -0
  147. fabricks/metastore/utils.py +51 -0
  148. fabricks/metastore/view.py +53 -0
  149. fabricks/utils/__init__.py +0 -0
  150. fabricks/utils/_types.py +6 -0
  151. fabricks/utils/azure_queue.py +93 -0
  152. fabricks/utils/azure_table.py +154 -0
  153. fabricks/utils/console.py +51 -0
  154. fabricks/utils/fdict.py +240 -0
  155. fabricks/utils/helpers.py +228 -0
  156. fabricks/utils/log.py +236 -0
  157. fabricks/utils/mermaid.py +32 -0
  158. fabricks/utils/path.py +242 -0
  159. fabricks/utils/pip.py +61 -0
  160. fabricks/utils/pydantic.py +94 -0
  161. fabricks/utils/read/__init__.py +11 -0
  162. fabricks/utils/read/_types.py +3 -0
  163. fabricks/utils/read/read.py +305 -0
  164. fabricks/utils/read/read_excel.py +5 -0
  165. fabricks/utils/read/read_yaml.py +33 -0
  166. fabricks/utils/schema/__init__.py +7 -0
  167. fabricks/utils/schema/get_json_schema_for_type.py +161 -0
  168. fabricks/utils/schema/get_schema_for_type.py +99 -0
  169. fabricks/utils/spark.py +76 -0
  170. fabricks/utils/sqlglot.py +56 -0
  171. fabricks/utils/write/__init__.py +8 -0
  172. fabricks/utils/write/delta.py +46 -0
  173. fabricks/utils/write/stream.py +27 -0
  174. fabricks-3.0.11.dist-info/METADATA +23 -0
  175. fabricks-3.0.11.dist-info/RECORD +176 -0
  176. fabricks-3.0.11.dist-info/WHEEL +4 -0
@@ -0,0 +1,127 @@
1
+ from typing import Optional, Union, cast, overload
2
+
3
+ from pyspark.sql.types import Row
4
+
5
+ from fabricks.core.jobs.base._types import Bronzes, Golds, Silvers, TBronze, TGold, TSilver
6
+ from fabricks.core.jobs.base.job import BaseJob
7
+ from fabricks.core.jobs.get_job_id import get_job_id
8
+
9
+
10
+ @overload
11
+ def get_job(*, step: str, job_id: str) -> BaseJob: ...
12
+
13
+
14
+ @overload
15
+ def get_job(*, step: str, topic: str, item: str) -> BaseJob: ...
16
+
17
+
18
+ @overload
19
+ def get_job(*, row: Row) -> BaseJob: ...
20
+
21
+
22
+ @overload
23
+ def get_job(job: str) -> BaseJob: ...
24
+
25
+
26
+ def get_job(
27
+ job: Optional[str] = None,
28
+ step: Optional[str] = None,
29
+ topic: Optional[str] = None,
30
+ item: Optional[str] = None,
31
+ job_id: Optional[str] = None,
32
+ row: Optional[Row] = None,
33
+ ) -> BaseJob:
34
+ """
35
+ Retrieve a job based on the provided parameters.
36
+
37
+ Args:
38
+ step (Optional[str]): The step of the job.
39
+ topic (Optional[str]): The topic of the job.
40
+ item (Optional[str]): The item of the job.
41
+ job_id (Optional[str]): The ID of the job.
42
+ job (Optional[str]): The job string.
43
+ row (Optional[Row]): The row object containing job information.
44
+
45
+ Returns:
46
+ BaseJob: The retrieved job.
47
+
48
+ Raises:
49
+ ValueError: If the required parameters are not provided.
50
+
51
+ """
52
+ if row:
53
+ if "step" in row and "topic" in row and "item" in row:
54
+ j = get_job_internal(step=row.step, topic=row.topic, item=row.item)
55
+ elif "step" in row and "job_id" in row:
56
+ j = get_job(step=row.step, job_id=row.job_id)
57
+ elif "job" in row:
58
+ parts = row.job.split(".")
59
+ s = parts[0]
60
+ job_id = get_job_id(job=row.job)
61
+ j = get_job_internal(step=s, job_id=job_id)
62
+ else:
63
+ raise ValueError("step, topic, item or step, job_id or job mandatory")
64
+
65
+ elif job:
66
+ parts = job.split(".")
67
+ s = parts[0]
68
+ job_id = get_job_id(job=job)
69
+ j = get_job_internal(step=s, job_id=job_id)
70
+
71
+ elif job_id:
72
+ assert step, "step mandatory"
73
+ j = get_job_internal(step=step, job_id=job_id)
74
+
75
+ else:
76
+ assert step, "step mandatory"
77
+ assert topic, "topic mandatory"
78
+ assert item, "item mandatory"
79
+ j = get_job_internal(step=step, topic=topic, item=item)
80
+
81
+ return j
82
+
83
+
84
+ def get_job_internal(
85
+ step: str,
86
+ topic: Optional[str] = None,
87
+ item: Optional[str] = None,
88
+ job_id: Optional[str] = None,
89
+ conf: Optional[Union[dict, Row]] = None,
90
+ ):
91
+ if step in Bronzes:
92
+ from fabricks.core.jobs.bronze import Bronze
93
+
94
+ step = cast(TBronze, step)
95
+ if job_id is not None:
96
+ job = Bronze.from_job_id(step=step, job_id=job_id, conf=conf)
97
+ else:
98
+ assert topic
99
+ assert item
100
+ job = Bronze.from_step_topic_item(step=step, topic=topic, item=item, conf=conf)
101
+
102
+ elif step in Silvers:
103
+ from fabricks.core.jobs.silver import Silver
104
+
105
+ step = cast(TSilver, step)
106
+ if job_id is not None:
107
+ job = Silver.from_job_id(step=step, job_id=job_id, conf=conf)
108
+ else:
109
+ assert topic
110
+ assert item
111
+ job = Silver.from_step_topic_item(step=step, topic=topic, item=item, conf=conf)
112
+
113
+ elif step in Golds:
114
+ from fabricks.core.jobs.gold import Gold
115
+
116
+ step = cast(TGold, step)
117
+ if job_id is not None:
118
+ job = Gold.from_job_id(step=step, job_id=job_id, conf=conf)
119
+ else:
120
+ assert topic
121
+ assert item
122
+ job = Gold.from_step_topic_item(step=step, topic=topic, item=item, conf=conf)
123
+
124
+ else:
125
+ raise ValueError(f"{step} not found")
126
+
127
+ return job
@@ -0,0 +1,152 @@
1
+ from typing import Optional, Union, cast, overload
2
+
3
+ from pyspark.sql.types import Row
4
+
5
+ from fabricks.context import IS_JOB_CONFIG_FROM_YAML, SPARK
6
+ from fabricks.core.jobs.base._types import Bronzes, Golds, JobConf, Silvers, TBronze, TGold, TSilver, TStep
7
+ from fabricks.core.jobs.get_job_id import get_job_id
8
+
9
+
10
+ def get_job_conf_internal(step: TStep, row: Union[Row, dict]) -> JobConf:
11
+ if isinstance(row, Row):
12
+ row = row.asDict(recursive=True)
13
+
14
+ options = row.get("options")
15
+ table_options = row.get("table_options")
16
+ check_options = row.get("check_options")
17
+ spark_options = row.get("spark_options")
18
+ invoker_options = row.get("invoker_options")
19
+ extender_options = row.get("extender_options")
20
+
21
+ job_id = row.get("job_id", get_job_id(step=step, topic=row["topic"], item=row["item"]))
22
+
23
+ if step in Bronzes:
24
+ from fabricks.core.jobs.base._types import JobConfBronze
25
+
26
+ assert options is not None, "no option"
27
+ step = cast(TBronze, step)
28
+ return JobConfBronze(
29
+ job_id=job_id,
30
+ topic=row["topic"],
31
+ item=row["item"],
32
+ step=step,
33
+ options=options,
34
+ parser_options=row.get("parser_options"),
35
+ table_options=table_options,
36
+ check_options=check_options,
37
+ invoker_options=invoker_options,
38
+ extender_options=extender_options,
39
+ spark_options=spark_options,
40
+ tags=row.get("tags"),
41
+ )
42
+
43
+ elif step in Silvers:
44
+ from fabricks.core.jobs.base._types import JobConfSilver
45
+
46
+ assert options is not None, "no option"
47
+ step = cast(TSilver, step)
48
+ return JobConfSilver(
49
+ job_id=job_id,
50
+ topic=row["topic"],
51
+ item=row["item"],
52
+ step=step,
53
+ options=options,
54
+ table_options=table_options,
55
+ check_options=check_options,
56
+ invoker_options=invoker_options,
57
+ extender_options=extender_options,
58
+ spark_options=spark_options,
59
+ tags=row.get("tags"),
60
+ )
61
+
62
+ elif step in Golds:
63
+ from fabricks.core.jobs.base._types import JobConfGold
64
+
65
+ assert options is not None, "no option"
66
+ step = cast(TGold, step)
67
+ return JobConfGold(
68
+ job_id=job_id,
69
+ topic=row["topic"],
70
+ item=row["item"],
71
+ step=step,
72
+ options=options,
73
+ table_options=table_options,
74
+ check_options=check_options,
75
+ invoker_options=invoker_options,
76
+ extender_options=extender_options,
77
+ spark_options=spark_options,
78
+ tags=row.get("tags"),
79
+ )
80
+
81
+ else:
82
+ raise ValueError(f"{step} not found")
83
+
84
+
85
+ @overload
86
+ def get_job_conf(step: TStep, *, job_id: str, row: Optional[Union[Row, dict]] = None) -> JobConf: ...
87
+
88
+
89
+ @overload
90
+ def get_job_conf(step: TStep, *, topic: str, item: str, row: Optional[Union[Row, dict]] = None) -> JobConf: ...
91
+
92
+
93
+ def get_job_conf(
94
+ step: TStep,
95
+ job_id: Optional[str] = None,
96
+ topic: Optional[str] = None,
97
+ item: Optional[str] = None,
98
+ row: Optional[Union[Row, dict]] = None,
99
+ ) -> JobConf:
100
+ if row:
101
+ return get_job_conf_internal(step=step, row=row)
102
+
103
+ if IS_JOB_CONFIG_FROM_YAML:
104
+ from fabricks.core.steps import get_step
105
+
106
+ s = get_step(step=step)
107
+ if topic:
108
+ iter = s.get_jobs_iter(topic=topic)
109
+ else:
110
+ iter = s.get_jobs_iter()
111
+
112
+ if job_id:
113
+ conf = next(
114
+ (
115
+ i
116
+ for i in iter
117
+ if i.get("job_id", get_job_id(step=i["step"], topic=i["topic"], item=i["item"])) == job_id
118
+ ),
119
+ None,
120
+ )
121
+ if not conf:
122
+ raise ValueError(f"job not found ({step}, {job_id})")
123
+
124
+ return get_job_conf_internal(step=step, row=conf)
125
+
126
+ elif topic and item:
127
+ conf = next(
128
+ (i for i in iter if i.get("topic") == topic and i.get("item") == item),
129
+ None,
130
+ )
131
+ if not conf:
132
+ raise ValueError(f"job not found ({step}, {topic}, {item})")
133
+
134
+ return get_job_conf_internal(step=step, row=conf)
135
+
136
+ else:
137
+ df = SPARK.sql(f"select * from fabricks.{step}_jobs")
138
+
139
+ assert df, f"{step} not found"
140
+
141
+ if job_id:
142
+ try:
143
+ row = df.where(f"job_id == '{job_id}'").collect()[0]
144
+ except IndexError:
145
+ raise ValueError(f"job not found ({step}, {job_id})")
146
+ else:
147
+ try:
148
+ row = df.where(f"topic == '{topic}' and item == '{item}'").collect()[0]
149
+ except IndexError:
150
+ raise ValueError(f"job not found ({step}, {topic}, {item})")
151
+
152
+ return get_job_conf_internal(step=step, row=row)
@@ -0,0 +1,31 @@
1
+ from typing import Optional, overload
2
+
3
+ from fabricks.utils.helpers import md5
4
+
5
+
6
+ @overload
7
+ def get_job_id(step: str, topic: str, item: str) -> str: ...
8
+
9
+
10
+ @overload
11
+ def get_job_id(*, job: str) -> str: ...
12
+
13
+
14
+ def get_job_id(
15
+ step: Optional[str] = None,
16
+ topic: Optional[str] = None,
17
+ item: Optional[str] = None,
18
+ job: Optional[str] = None,
19
+ ) -> str:
20
+ if not job:
21
+ assert step
22
+ assert topic
23
+ assert item
24
+ job = f"{step}.{topic}_{item}"
25
+
26
+ return md5(job)
27
+
28
+
29
+ def get_dependency_id(parent: str, job_id: str) -> str:
30
+ base = f"{job_id}*{parent}"
31
+ return md5(base)
@@ -0,0 +1,107 @@
1
+ from dataclasses import dataclass
2
+ from typing import List, Literal, Optional, TypedDict, Union, overload
3
+
4
+ from pyspark.sql import DataFrame
5
+ from pyspark.sql.functions import expr
6
+ from pyspark.sql.types import Row
7
+
8
+ from fabricks.context import IS_JOB_CONFIG_FROM_YAML, PATHS_RUNTIME, SPARK
9
+ from fabricks.core.jobs.base._types import AllowedModes, TStep
10
+ from fabricks.core.jobs.base.job import BaseJob
11
+ from fabricks.core.jobs.get_job import get_job, get_job_internal
12
+ from fabricks.utils.helpers import concat_dfs, run_in_parallel
13
+ from fabricks.utils.path import Path
14
+ from fabricks.utils.read import read_yaml
15
+ from fabricks.utils.schema import get_schema_for_type
16
+
17
+
18
+ class GenericOptions(TypedDict):
19
+ mode: AllowedModes
20
+
21
+
22
+ @dataclass
23
+ class JobConfGeneric:
24
+ step: TStep
25
+ job_id: str
26
+ topic: str
27
+ item: str
28
+ options: GenericOptions
29
+
30
+
31
+ def _get_job(row: Row):
32
+ return get_job(row=row)
33
+
34
+
35
+ def get_jobs_internal():
36
+ for p in PATHS_RUNTIME.values():
37
+ yield from read_yaml(p, root="job")
38
+
39
+
40
+ def get_jobs_internal_df() -> DataFrame:
41
+ if IS_JOB_CONFIG_FROM_YAML:
42
+ schema = get_schema_for_type(JobConfGeneric)
43
+
44
+ def _read_yaml(path: Path):
45
+ df = SPARK.createDataFrame(read_yaml(path, root="job"), schema=schema) # type: ignore
46
+ if df:
47
+ df = df.withColumn("job_id", expr("md5(concat(step,'.',topic,'_',item))"))
48
+ return df
49
+
50
+ dfs = run_in_parallel(_read_yaml, list(PATHS_RUNTIME.values()))
51
+ df = concat_dfs(dfs)
52
+ assert df is not None
53
+
54
+ else:
55
+ df = SPARK.sql("select * from fabricks.jobs")
56
+
57
+ return df
58
+
59
+
60
+ @overload
61
+ def get_jobs(df: Optional[DataFrame] = None, *, convert: Literal[True]) -> List[BaseJob]: ...
62
+
63
+
64
+ @overload
65
+ def get_jobs(df: Optional[DataFrame] = None, *, convert: Literal[False]) -> DataFrame: ...
66
+
67
+
68
+ def get_jobs(df: Optional[DataFrame] = None, convert: Optional[bool] = False) -> Union[List[BaseJob], DataFrame]:
69
+ """
70
+ Retrieves a list of jobs or a DataFrame containing job information.
71
+
72
+ Args:
73
+ df (Optional[DataFrame]): Optional DataFrame containing job information.
74
+ convert (Optional[bool]): Flag indicating whether to convert the DataFrame to a list of jobs.
75
+
76
+ Returns:
77
+ Union[List[BaseJob], DataFrame]: If `convert` is False, returns a list of BaseJob objects.
78
+ If `convert` is True, returns a DataFrame with selected columns.
79
+
80
+ Raises:
81
+ ValueError: If the DataFrame does not contain the required columns.
82
+
83
+ """
84
+ if not convert:
85
+ return get_jobs_internal_df()
86
+
87
+ else:
88
+ if df is None:
89
+ return list(
90
+ get_job_internal(j["step"], j["topic"], j["item"], j.get("job_id"), conf=j)
91
+ for j in get_jobs_internal()
92
+ )
93
+
94
+ else:
95
+ if "step" in df.columns and "topic" in df.columns and "item" in df.columns:
96
+ df = df.select("step", "topic", "item")
97
+ elif "step" in df.columns and "job_id" in df.columns:
98
+ df = df.select("step", "job_id")
99
+ elif "job" in df.columns:
100
+ df = df.select("job")
101
+ else:
102
+ raise ValueError("step, topic, item or step, job_id or job mandatory")
103
+
104
+ assert df
105
+
106
+ jobs = run_in_parallel(_get_job, df)
107
+ return jobs
@@ -0,0 +1,10 @@
1
+ from typing import Dict
2
+
3
+ from fabricks.core.jobs.get_schedules import get_schedules
4
+
5
+
6
+ def get_schedule(name: str) -> Dict:
7
+ schedule = next(s for s in get_schedules() if s.get("name") == name)
8
+
9
+ assert schedule, "schedule not found"
10
+ return schedule
@@ -0,0 +1,32 @@
1
+ from typing import List, Optional, TypedDict
2
+
3
+ from pyspark.sql import DataFrame
4
+
5
+ from fabricks.context import PATH_SCHEDULES, SPARK
6
+ from fabricks.core.jobs.base._types import TStep
7
+ from fabricks.utils.read.read_yaml import read_yaml
8
+ from fabricks.utils.schema import get_schema_for_type
9
+
10
+
11
+ class Options(TypedDict):
12
+ steps: Optional[List[TStep]]
13
+ tag: Optional[str]
14
+ view: Optional[str]
15
+ variables: Optional[dict[str, str]]
16
+
17
+
18
+ class Schedule(TypedDict):
19
+ name: str
20
+ options: Options
21
+
22
+
23
+ def get_schedules():
24
+ return read_yaml(PATH_SCHEDULES, root="schedule")
25
+
26
+
27
+ def get_schedules_df() -> DataFrame:
28
+ schema = get_schema_for_type(Schedule)
29
+ df = SPARK.createDataFrame(list(get_schedules()), schema=schema) # type: ignore
30
+
31
+ assert df, "no schedules found"
32
+ return df