fabricks 3.0.11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (176) hide show
  1. fabricks/__init__.py +0 -0
  2. fabricks/api/__init__.py +11 -0
  3. fabricks/api/cdc/__init__.py +6 -0
  4. fabricks/api/cdc/nocdc.py +3 -0
  5. fabricks/api/cdc/scd1.py +3 -0
  6. fabricks/api/cdc/scd2.py +3 -0
  7. fabricks/api/context.py +27 -0
  8. fabricks/api/core.py +4 -0
  9. fabricks/api/deploy.py +3 -0
  10. fabricks/api/exceptions.py +19 -0
  11. fabricks/api/extenders.py +3 -0
  12. fabricks/api/job_schema.py +3 -0
  13. fabricks/api/log.py +3 -0
  14. fabricks/api/masks.py +3 -0
  15. fabricks/api/metastore/__init__.py +10 -0
  16. fabricks/api/metastore/database.py +3 -0
  17. fabricks/api/metastore/table.py +3 -0
  18. fabricks/api/metastore/view.py +6 -0
  19. fabricks/api/notebooks/__init__.py +0 -0
  20. fabricks/api/notebooks/cluster.py +6 -0
  21. fabricks/api/notebooks/initialize.py +42 -0
  22. fabricks/api/notebooks/process.py +54 -0
  23. fabricks/api/notebooks/run.py +59 -0
  24. fabricks/api/notebooks/schedule.py +75 -0
  25. fabricks/api/notebooks/terminate.py +31 -0
  26. fabricks/api/parsers.py +3 -0
  27. fabricks/api/schedules.py +3 -0
  28. fabricks/api/udfs.py +3 -0
  29. fabricks/api/utils.py +9 -0
  30. fabricks/api/version.py +3 -0
  31. fabricks/api/views.py +6 -0
  32. fabricks/cdc/__init__.py +14 -0
  33. fabricks/cdc/base/__init__.py +4 -0
  34. fabricks/cdc/base/_types.py +10 -0
  35. fabricks/cdc/base/cdc.py +5 -0
  36. fabricks/cdc/base/configurator.py +223 -0
  37. fabricks/cdc/base/generator.py +177 -0
  38. fabricks/cdc/base/merger.py +110 -0
  39. fabricks/cdc/base/processor.py +471 -0
  40. fabricks/cdc/cdc.py +5 -0
  41. fabricks/cdc/nocdc.py +20 -0
  42. fabricks/cdc/scd.py +22 -0
  43. fabricks/cdc/scd1.py +15 -0
  44. fabricks/cdc/scd2.py +15 -0
  45. fabricks/cdc/templates/__init__.py +0 -0
  46. fabricks/cdc/templates/ctes/base.sql.jinja +35 -0
  47. fabricks/cdc/templates/ctes/current.sql.jinja +28 -0
  48. fabricks/cdc/templates/ctes/deduplicate_hash.sql.jinja +32 -0
  49. fabricks/cdc/templates/ctes/deduplicate_key.sql.jinja +31 -0
  50. fabricks/cdc/templates/ctes/rectify.sql.jinja +113 -0
  51. fabricks/cdc/templates/ctes/slice.sql.jinja +1 -0
  52. fabricks/cdc/templates/filter.sql.jinja +4 -0
  53. fabricks/cdc/templates/filters/final.sql.jinja +4 -0
  54. fabricks/cdc/templates/filters/latest.sql.jinja +17 -0
  55. fabricks/cdc/templates/filters/update.sql.jinja +30 -0
  56. fabricks/cdc/templates/macros/bactick.sql.jinja +1 -0
  57. fabricks/cdc/templates/macros/hash.sql.jinja +18 -0
  58. fabricks/cdc/templates/merge.sql.jinja +3 -0
  59. fabricks/cdc/templates/merges/nocdc.sql.jinja +41 -0
  60. fabricks/cdc/templates/merges/scd1.sql.jinja +73 -0
  61. fabricks/cdc/templates/merges/scd2.sql.jinja +54 -0
  62. fabricks/cdc/templates/queries/__init__.py +0 -0
  63. fabricks/cdc/templates/queries/context.sql.jinja +186 -0
  64. fabricks/cdc/templates/queries/final.sql.jinja +1 -0
  65. fabricks/cdc/templates/queries/nocdc/complete.sql.jinja +10 -0
  66. fabricks/cdc/templates/queries/nocdc/update.sql.jinja +34 -0
  67. fabricks/cdc/templates/queries/scd1.sql.jinja +85 -0
  68. fabricks/cdc/templates/queries/scd2.sql.jinja +98 -0
  69. fabricks/cdc/templates/query.sql.jinja +15 -0
  70. fabricks/context/__init__.py +72 -0
  71. fabricks/context/_types.py +133 -0
  72. fabricks/context/config/__init__.py +92 -0
  73. fabricks/context/config/utils.py +53 -0
  74. fabricks/context/log.py +77 -0
  75. fabricks/context/runtime.py +117 -0
  76. fabricks/context/secret.py +103 -0
  77. fabricks/context/spark_session.py +82 -0
  78. fabricks/context/utils.py +80 -0
  79. fabricks/core/__init__.py +4 -0
  80. fabricks/core/dags/__init__.py +9 -0
  81. fabricks/core/dags/base.py +99 -0
  82. fabricks/core/dags/generator.py +157 -0
  83. fabricks/core/dags/log.py +12 -0
  84. fabricks/core/dags/processor.py +228 -0
  85. fabricks/core/dags/run.py +39 -0
  86. fabricks/core/dags/terminator.py +25 -0
  87. fabricks/core/dags/utils.py +54 -0
  88. fabricks/core/extenders.py +33 -0
  89. fabricks/core/job_schema.py +32 -0
  90. fabricks/core/jobs/__init__.py +21 -0
  91. fabricks/core/jobs/base/__init__.py +10 -0
  92. fabricks/core/jobs/base/_types.py +284 -0
  93. fabricks/core/jobs/base/checker.py +139 -0
  94. fabricks/core/jobs/base/configurator.py +306 -0
  95. fabricks/core/jobs/base/exception.py +85 -0
  96. fabricks/core/jobs/base/generator.py +447 -0
  97. fabricks/core/jobs/base/invoker.py +206 -0
  98. fabricks/core/jobs/base/job.py +5 -0
  99. fabricks/core/jobs/base/processor.py +249 -0
  100. fabricks/core/jobs/bronze.py +395 -0
  101. fabricks/core/jobs/get_job.py +127 -0
  102. fabricks/core/jobs/get_job_conf.py +152 -0
  103. fabricks/core/jobs/get_job_id.py +31 -0
  104. fabricks/core/jobs/get_jobs.py +107 -0
  105. fabricks/core/jobs/get_schedule.py +10 -0
  106. fabricks/core/jobs/get_schedules.py +32 -0
  107. fabricks/core/jobs/gold.py +415 -0
  108. fabricks/core/jobs/silver.py +373 -0
  109. fabricks/core/masks.py +52 -0
  110. fabricks/core/parsers/__init__.py +12 -0
  111. fabricks/core/parsers/_types.py +6 -0
  112. fabricks/core/parsers/base.py +95 -0
  113. fabricks/core/parsers/decorator.py +11 -0
  114. fabricks/core/parsers/get_parser.py +26 -0
  115. fabricks/core/parsers/utils.py +69 -0
  116. fabricks/core/schedules/__init__.py +14 -0
  117. fabricks/core/schedules/diagrams.py +21 -0
  118. fabricks/core/schedules/generate.py +20 -0
  119. fabricks/core/schedules/get_schedule.py +5 -0
  120. fabricks/core/schedules/get_schedules.py +9 -0
  121. fabricks/core/schedules/process.py +9 -0
  122. fabricks/core/schedules/run.py +3 -0
  123. fabricks/core/schedules/terminate.py +6 -0
  124. fabricks/core/schedules/views.py +61 -0
  125. fabricks/core/steps/__init__.py +4 -0
  126. fabricks/core/steps/_types.py +7 -0
  127. fabricks/core/steps/base.py +423 -0
  128. fabricks/core/steps/get_step.py +10 -0
  129. fabricks/core/steps/get_step_conf.py +26 -0
  130. fabricks/core/udfs.py +106 -0
  131. fabricks/core/views.py +41 -0
  132. fabricks/deploy/__init__.py +92 -0
  133. fabricks/deploy/masks.py +8 -0
  134. fabricks/deploy/notebooks.py +71 -0
  135. fabricks/deploy/schedules.py +10 -0
  136. fabricks/deploy/tables.py +82 -0
  137. fabricks/deploy/udfs.py +19 -0
  138. fabricks/deploy/utils.py +36 -0
  139. fabricks/deploy/views.py +509 -0
  140. fabricks/metastore/README.md +3 -0
  141. fabricks/metastore/__init__.py +5 -0
  142. fabricks/metastore/_types.py +65 -0
  143. fabricks/metastore/database.py +65 -0
  144. fabricks/metastore/dbobject.py +66 -0
  145. fabricks/metastore/pyproject.toml +20 -0
  146. fabricks/metastore/table.py +768 -0
  147. fabricks/metastore/utils.py +51 -0
  148. fabricks/metastore/view.py +53 -0
  149. fabricks/utils/__init__.py +0 -0
  150. fabricks/utils/_types.py +6 -0
  151. fabricks/utils/azure_queue.py +93 -0
  152. fabricks/utils/azure_table.py +154 -0
  153. fabricks/utils/console.py +51 -0
  154. fabricks/utils/fdict.py +240 -0
  155. fabricks/utils/helpers.py +228 -0
  156. fabricks/utils/log.py +236 -0
  157. fabricks/utils/mermaid.py +32 -0
  158. fabricks/utils/path.py +242 -0
  159. fabricks/utils/pip.py +61 -0
  160. fabricks/utils/pydantic.py +94 -0
  161. fabricks/utils/read/__init__.py +11 -0
  162. fabricks/utils/read/_types.py +3 -0
  163. fabricks/utils/read/read.py +305 -0
  164. fabricks/utils/read/read_excel.py +5 -0
  165. fabricks/utils/read/read_yaml.py +33 -0
  166. fabricks/utils/schema/__init__.py +7 -0
  167. fabricks/utils/schema/get_json_schema_for_type.py +161 -0
  168. fabricks/utils/schema/get_schema_for_type.py +99 -0
  169. fabricks/utils/spark.py +76 -0
  170. fabricks/utils/sqlglot.py +56 -0
  171. fabricks/utils/write/__init__.py +8 -0
  172. fabricks/utils/write/delta.py +46 -0
  173. fabricks/utils/write/stream.py +27 -0
  174. fabricks-3.0.11.dist-info/METADATA +23 -0
  175. fabricks-3.0.11.dist-info/RECORD +176 -0
  176. fabricks-3.0.11.dist-info/WHEEL +4 -0
@@ -0,0 +1,306 @@
1
+ from abc import ABC, abstractmethod
2
+ from functools import lru_cache
3
+ from typing import Optional, Union, cast
4
+
5
+ from pyspark.sql import DataFrame, SparkSession
6
+ from pyspark.sql.types import Row
7
+ from typing_extensions import deprecated
8
+
9
+ from fabricks.cdc import SCD1, SCD2, AllowedChangeDataCaptures, NoCDC
10
+ from fabricks.context import CONF_RUNTIME, PATHS_RUNTIME, PATHS_STORAGE, STEPS
11
+ from fabricks.context.log import DEFAULT_LOGGER
12
+ from fabricks.context.spark_session import build_spark_session
13
+ from fabricks.core.jobs.base._types import AllowedModes, Options, Paths, TStep
14
+ from fabricks.core.jobs.get_job_conf import get_job_conf
15
+ from fabricks.core.jobs.get_job_id import get_job_id
16
+ from fabricks.metastore.table import Table
17
+ from fabricks.utils.fdict import FDict
18
+ from fabricks.utils.path import Path
19
+
20
+
21
+ class Configurator(ABC):
22
+ def __init__(
23
+ self,
24
+ expand: str,
25
+ step: TStep,
26
+ topic: Optional[str] = None,
27
+ item: Optional[str] = None,
28
+ job_id: Optional[str] = None,
29
+ conf: Optional[Union[dict, Row]] = None,
30
+ ):
31
+ self.expand = expand
32
+ self.step: TStep = step
33
+
34
+ if job_id is not None:
35
+ self.job_id = job_id
36
+ self.conf = get_job_conf(step=self.step, job_id=self.job_id, row=conf)
37
+ self.topic = self.conf.topic
38
+ self.item = self.conf.item
39
+ else:
40
+ assert topic
41
+ assert item
42
+ self.topic = topic
43
+ self.item = item
44
+ self.conf = get_job_conf(step=self.step, topic=self.topic, item=self.item, row=conf)
45
+ self.job_id = get_job_id(step=self.step, topic=self.topic, item=self.item)
46
+
47
+ _step_conf: Optional[dict[str, str]] = None
48
+ _spark: Optional[SparkSession] = None
49
+ _timeout: Optional[int] = None
50
+ _options: Optional[Options] = None
51
+ _paths: Optional[Paths] = None
52
+ _table: Optional[Table] = None
53
+ _root: Optional[Path] = None
54
+
55
+ _cdc: Optional[Union[NoCDC, SCD1, SCD2]] = None
56
+ _change_data_capture: Optional[AllowedChangeDataCaptures] = None
57
+ _mode: Optional[AllowedModes] = None
58
+
59
+ @property
60
+ @abstractmethod
61
+ def stream(self) -> bool: ...
62
+
63
+ @property
64
+ @abstractmethod
65
+ def schema_drift(self) -> bool: ...
66
+
67
+ @property
68
+ @abstractmethod
69
+ def persist(self) -> bool: ...
70
+
71
+ @property
72
+ @abstractmethod
73
+ def virtual(self) -> bool: ...
74
+
75
+ @classmethod
76
+ def from_step_topic_item(cls, step: str, topic: str, item: str): ...
77
+
78
+ @classmethod
79
+ def from_job_id(cls, step: str, job_id: str): ...
80
+
81
+ @property
82
+ def spark(self) -> SparkSession:
83
+ if not self._spark:
84
+ spark = build_spark_session(app_name=str(self))
85
+
86
+ step_options = self.step_conf.get("spark_options", {})
87
+ step_sql_options = step_options.get("sql", {})
88
+ step_conf_options = step_options.get("conf", {})
89
+ if step_sql_options:
90
+ for key, value in step_sql_options.items():
91
+ DEFAULT_LOGGER.debug(f"add {key} = {value}", extra={"label": self.step})
92
+ spark.sql(f"set {key} = {value}")
93
+ if step_conf_options:
94
+ for key, value in step_conf_options.items():
95
+ DEFAULT_LOGGER.debug(f"add {key} = {value}", extra={"label": self.step})
96
+ spark.conf.set(f"{key}", f"{value}")
97
+
98
+ job_sql_options = self.options.spark.get_dict("sql")
99
+ job_conf_options = self.options.spark.get_dict("conf")
100
+ if job_sql_options:
101
+ for key, value in job_sql_options.items():
102
+ DEFAULT_LOGGER.debug(f"add {key} = {value}", extra={"label": self})
103
+ spark.sql(f"set {key} = {value}")
104
+ if job_conf_options:
105
+ for key, value in job_conf_options.items():
106
+ DEFAULT_LOGGER.debug(f"add {key} = {value}", extra={"label": self})
107
+ spark.conf.set(f"{key}", f"{value}")
108
+
109
+ self._spark = spark
110
+ return self._spark
111
+
112
+ @property
113
+ def step_conf(self) -> dict:
114
+ if not self._step_conf:
115
+ _conf = [s for s in STEPS if s.get("name") == self.step][0]
116
+ assert _conf is not None
117
+ self._step_conf = cast(dict[str, str], _conf)
118
+ return self._step_conf
119
+
120
+ @property
121
+ def qualified_name(self) -> str:
122
+ return f"{self.step}.{self.topic}_{self.item}"
123
+
124
+ def _get_timeout(self, what: str) -> int:
125
+ t = self.step_conf.get("options", {}).get("timeouts", {}).get(what, None)
126
+ if t is None:
127
+ t = CONF_RUNTIME.get("options", {}).get("timeouts", {}).get(what)
128
+ assert t is not None
129
+ return t
130
+
131
+ @property
132
+ def timeout(self) -> int:
133
+ if not self._timeout:
134
+ t = self.options.job.get("timeout")
135
+
136
+ if t is None:
137
+ t = self._get_timeout("job")
138
+
139
+ assert t is not None
140
+ self._timeout = int(t)
141
+
142
+ return self._timeout
143
+
144
+ def pip(self):
145
+ pass
146
+
147
+ @property
148
+ def table(self) -> Table:
149
+ return self.cdc.table
150
+
151
+ @property
152
+ def paths(self) -> Paths:
153
+ if not self._paths:
154
+ storage = PATHS_STORAGE.get(self.step)
155
+ assert storage
156
+
157
+ runtime_root = PATHS_RUNTIME.get(self.step)
158
+ assert runtime_root
159
+
160
+ self._paths = Paths(
161
+ storage=storage,
162
+ tmp=storage.joinpath("tmp", self.topic, self.item),
163
+ checkpoints=storage.joinpath("checkpoints", self.topic, self.item),
164
+ commits=storage.joinpath("checkpoints", self.topic, self.item, "commits"),
165
+ schema=storage.joinpath("schema", self.topic, self.item),
166
+ runtime=runtime_root.joinpath(self.topic, self.item),
167
+ )
168
+
169
+ return self._paths
170
+
171
+ @property
172
+ @lru_cache(maxsize=None)
173
+ def options(self) -> Options:
174
+ if not self._options:
175
+ job = self.conf.options or {}
176
+ table = self.conf.table_options or {}
177
+ check = self.conf.check_options or {}
178
+ spark = self.conf.spark_options or {}
179
+ invokers = self.conf.invoker_options or {}
180
+ extenders = self.conf.extender_options or []
181
+
182
+ self._options = Options(
183
+ job=FDict(job),
184
+ table=FDict(table),
185
+ check=FDict(check),
186
+ spark=FDict(spark),
187
+ invokers=FDict(invokers),
188
+ extenders=extenders,
189
+ )
190
+ return self._options
191
+
192
+ @property
193
+ def change_data_capture(self) -> AllowedChangeDataCaptures:
194
+ if not self._change_data_capture:
195
+ cdc: AllowedChangeDataCaptures = self.options.job.get("change_data_capture") or "nocdc"
196
+ self._change_data_capture = cdc
197
+ return self._change_data_capture
198
+
199
+ @property
200
+ def cdc(self) -> Union[NoCDC, SCD1, SCD2]:
201
+ if not self._cdc:
202
+ if self.change_data_capture == "nocdc":
203
+ cdc = NoCDC(self.step, self.topic, self.item, spark=self.spark)
204
+ elif self.change_data_capture == "scd1":
205
+ cdc = SCD1(self.step, self.topic, self.item, spark=self.spark)
206
+ elif self.change_data_capture == "scd2":
207
+ cdc = SCD2(self.step, self.topic, self.item, spark=self.spark)
208
+ else:
209
+ raise ValueError(f"{self.change_data_capture} not allowed")
210
+ self._cdc = cdc
211
+ return self._cdc
212
+
213
+ @property
214
+ def slowly_changing_dimension(self) -> bool:
215
+ return self.change_data_capture in ["scd1", "scd2"]
216
+
217
+ @abstractmethod
218
+ def get_cdc_context(self, df: DataFrame, reload: Optional[bool] = False) -> dict: ...
219
+
220
+ def get_cdc_data(self, stream: bool = False) -> Optional[DataFrame]:
221
+ df = self.get_data(stream=stream)
222
+ if df:
223
+ cdc_context = self.get_cdc_context(df)
224
+ cdc_df = self.cdc.get_data(src=df, **cdc_context)
225
+ return cdc_df
226
+
227
+ @property
228
+ def mode(self) -> AllowedModes:
229
+ if not self._mode:
230
+ _mode = self.options.job.get("mode")
231
+ assert _mode is not None
232
+ self._mode = cast(AllowedModes, _mode)
233
+ return self._mode
234
+
235
+ @abstractmethod
236
+ def get_data(self, stream: bool = False, transform: Optional[bool] = None, **kwargs) -> Optional[DataFrame]: ...
237
+
238
+ @abstractmethod
239
+ def for_each_batch(self, df: DataFrame, batch: Optional[int] = None, **kwargs): ...
240
+
241
+ @abstractmethod
242
+ def for_each_run(self, **kwargs): ...
243
+
244
+ @abstractmethod
245
+ def base_transform(self, df: DataFrame) -> DataFrame: ...
246
+
247
+ @abstractmethod
248
+ def run(
249
+ self,
250
+ retry: Optional[bool] = True,
251
+ schedule: Optional[str] = None,
252
+ schedule_id: Optional[str] = None,
253
+ invoke: Optional[bool] = True,
254
+ ): ...
255
+
256
+ @deprecated("use maintain instead")
257
+ def optimize(
258
+ self,
259
+ vacuum: Optional[bool] = True,
260
+ optimize: Optional[bool] = True,
261
+ analyze: Optional[bool] = True,
262
+ ):
263
+ return self.maintain(
264
+ vacuum=vacuum,
265
+ optimize=optimize,
266
+ compute_statistics=analyze,
267
+ )
268
+
269
+ def maintain(
270
+ self,
271
+ vacuum: Optional[bool] = True,
272
+ optimize: Optional[bool] = True,
273
+ compute_statistics: Optional[bool] = True,
274
+ ):
275
+ if self.mode == "memory":
276
+ DEFAULT_LOGGER.debug("could not maintain (memory)", extra={"label": self})
277
+
278
+ else:
279
+ if vacuum:
280
+ self.vacuum()
281
+ if optimize:
282
+ self.cdc.optimize_table()
283
+ if compute_statistics:
284
+ self.table.compute_statistics()
285
+
286
+ def vacuum(self):
287
+ if self.mode == "memory":
288
+ DEFAULT_LOGGER.debug("could not vacuum (memory)", extra={"label": self})
289
+
290
+ else:
291
+ job = self.options.table.get("retention_days")
292
+ step = self.step_conf.get("table_options", {}).get("retention_days", None)
293
+ runtime = CONF_RUNTIME.get("options", {}).get("retention_days")
294
+
295
+ if job is not None:
296
+ retention_days = job
297
+ elif step:
298
+ retention_days = step
299
+ else:
300
+ assert runtime
301
+ retention_days = runtime
302
+
303
+ self.table.vacuum(retention_days=retention_days)
304
+
305
+ def __str__(self):
306
+ return f"{self.step}.{self.topic}_{self.item}"
@@ -0,0 +1,85 @@
1
+ from typing import Sequence
2
+
3
+ from pyspark.sql import DataFrame
4
+
5
+ from fabricks.metastore.table import SchemaDiff
6
+
7
+
8
+ class CustomException(Exception):
9
+ pass
10
+
11
+
12
+ class CheckException(Exception):
13
+ def __init__(self, message: str, dataframe: DataFrame):
14
+ self.message = message
15
+ self.dataframe = dataframe
16
+
17
+ super().__init__(self.message)
18
+
19
+
20
+ class CheckWarning(CheckException):
21
+ pass
22
+
23
+
24
+ class PreRunCheckException(CheckException):
25
+ pass
26
+
27
+
28
+ class PostRunCheckException(CheckException):
29
+ pass
30
+
31
+
32
+ class PreRunCheckWarning(CheckWarning):
33
+ pass
34
+
35
+
36
+ class PostRunCheckWarning(CheckWarning):
37
+ pass
38
+
39
+
40
+ class PreRunInvokeException(CustomException):
41
+ pass
42
+
43
+
44
+ class PostRunInvokeException(CustomException):
45
+ pass
46
+
47
+
48
+ class SkipRunCheckWarning(CheckWarning):
49
+ pass
50
+
51
+
52
+ class SchemaDriftException(Exception):
53
+ @staticmethod
54
+ def from_diffs(table: str, diffs: Sequence[SchemaDiff]):
55
+ out = []
56
+ type_widening_compatible = True
57
+
58
+ added = [d.new_column or d.column for d in diffs if d.status == "added"]
59
+ if added:
60
+ type_widening_compatible = False
61
+ out.append("added columns:\n" + "\n".join(f"\t- {col}" for col in added))
62
+
63
+ removed = [d.column for d in diffs if d.status == "dropped"]
64
+ if removed:
65
+ type_widening_compatible = False
66
+ out.append("removed columns:\n" + "\n".join(f"\t- {col}" for col in removed))
67
+
68
+ changed = [f"{d.column} ({d.data_type} -> {d.new_data_type})" for d in diffs if d.status == "changed"]
69
+ if changed:
70
+ if False in [d.type_widening_compatible for d in diffs if d.status == "changed"]:
71
+ type_widening_compatible = False
72
+
73
+ out.append("changed columns:\n" + "\n".join(f"\t- {col}" for col in changed))
74
+
75
+ out = "\n".join(out)
76
+
77
+ if type_widening_compatible:
78
+ return SchemaDriftException(f"type widening detected:\n {out}", diffs, type_widening_compatible)
79
+ else:
80
+ return SchemaDriftException(f"schema drift detected:\n {out}", diffs, type_widening_compatible)
81
+
82
+ def __init__(self, message: str, diffs: Sequence[SchemaDiff], type_widening_compatible: bool = False):
83
+ super().__init__(message)
84
+ self.diffs = diffs
85
+ self.type_widening_compatible = type_widening_compatible