fabricks 3.0.19__py3-none-any.whl → 4.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- fabricks/api/context.py +15 -3
- fabricks/api/notebooks/schedule.py +2 -3
- fabricks/api/parsers.py +2 -1
- fabricks/api/utils.py +3 -1
- fabricks/cdc/__init__.py +1 -2
- fabricks/cdc/base/__init__.py +1 -2
- fabricks/cdc/base/_types.py +5 -3
- fabricks/cdc/base/configurator.py +5 -0
- fabricks/cdc/base/generator.py +7 -3
- fabricks/cdc/base/merger.py +2 -0
- fabricks/cdc/base/processor.py +15 -0
- fabricks/cdc/templates/README.md +490 -0
- fabricks/cdc/templates/ctes/base.sql.jinja +1 -0
- fabricks/cdc/templates/ctes/current.sql.jinja +4 -0
- fabricks/cdc/templates/merges/scd1.sql.jinja +6 -0
- fabricks/cdc/templates/merges/scd2.sql.jinja +6 -0
- fabricks/cdc/templates/queries/context.sql.jinja +104 -96
- fabricks/cdc/templates/query.sql.jinja +1 -1
- fabricks/context/__init__.py +13 -1
- fabricks/context/config.py +13 -122
- fabricks/context/log.py +92 -1
- fabricks/context/runtime.py +35 -69
- fabricks/context/spark_session.py +8 -7
- fabricks/context/utils.py +26 -39
- fabricks/core/__init__.py +2 -2
- fabricks/core/dags/base.py +5 -5
- fabricks/core/dags/processor.py +2 -3
- fabricks/core/extenders.py +1 -1
- fabricks/core/job_schema.py +26 -16
- fabricks/core/jobs/__init__.py +1 -7
- fabricks/core/jobs/base/README.md +1545 -0
- fabricks/core/jobs/base/__init__.py +1 -8
- fabricks/core/jobs/base/checker.py +7 -7
- fabricks/core/jobs/base/configurator.py +142 -63
- fabricks/core/jobs/base/generator.py +38 -34
- fabricks/core/jobs/base/invoker.py +48 -63
- fabricks/core/jobs/base/processor.py +13 -28
- fabricks/core/jobs/bronze.py +88 -38
- fabricks/core/jobs/get_job.py +3 -6
- fabricks/core/jobs/get_job_conf.py +19 -68
- fabricks/core/jobs/get_jobs.py +10 -11
- fabricks/core/jobs/get_schedules.py +3 -17
- fabricks/core/jobs/gold.py +89 -47
- fabricks/core/jobs/silver.py +42 -22
- fabricks/core/masks.py +11 -8
- fabricks/core/parsers/__init__.py +0 -2
- fabricks/core/parsers/base.py +10 -10
- fabricks/core/parsers/decorator.py +1 -1
- fabricks/core/parsers/get_parser.py +4 -5
- fabricks/core/schedules/process.py +1 -4
- fabricks/core/steps/base.py +27 -17
- fabricks/core/steps/get_step.py +2 -4
- fabricks/core/steps/get_step_conf.py +3 -7
- fabricks/core/udfs.py +7 -7
- fabricks/core/views.py +2 -2
- fabricks/deploy/__init__.py +27 -16
- fabricks/deploy/masks.py +1 -1
- fabricks/deploy/notebooks.py +19 -16
- fabricks/deploy/schedules.py +1 -1
- fabricks/deploy/tables.py +66 -49
- fabricks/deploy/udfs.py +2 -2
- fabricks/deploy/views.py +15 -16
- fabricks/metastore/database.py +3 -3
- fabricks/metastore/table.py +103 -68
- fabricks/models/__init__.py +125 -0
- fabricks/models/common.py +79 -0
- fabricks/models/config.py +225 -0
- fabricks/models/dependency.py +50 -0
- fabricks/models/job.py +157 -0
- fabricks/models/path.py +17 -0
- fabricks/models/runtime.py +182 -0
- fabricks/models/schedule.py +21 -0
- fabricks/models/step.py +103 -0
- fabricks/models/table.py +77 -0
- fabricks/{core/jobs/get_job_id.py → models/utils.py} +2 -0
- fabricks/utils/helpers.py +6 -5
- fabricks/utils/log.py +25 -6
- fabricks/utils/path.py +265 -108
- fabricks/utils/pip.py +7 -7
- fabricks/utils/read/read.py +23 -22
- fabricks/utils/read/read_yaml.py +2 -2
- fabricks/utils/write/delta.py +4 -4
- fabricks/utils/write/stream.py +2 -2
- {fabricks-3.0.19.dist-info → fabricks-4.0.0.dist-info}/METADATA +9 -4
- {fabricks-3.0.19.dist-info → fabricks-4.0.0.dist-info}/RECORD +86 -83
- fabricks/context/_types.py +0 -139
- fabricks/context/helpers.py +0 -63
- fabricks/core/jobs/base/_types.py +0 -284
- fabricks/core/parsers/_types.py +0 -6
- fabricks/utils/fdict.py +0 -240
- fabricks/utils/pydantic.py +0 -94
- fabricks/utils/schema/__init__.py +0 -7
- fabricks/utils/schema/get_json_schema_for_type.py +0 -161
- fabricks/utils/schema/get_schema_for_type.py +0 -99
- {fabricks-3.0.19.dist-info → fabricks-4.0.0.dist-info}/WHEEL +0 -0
fabricks/deploy/views.py
CHANGED
|
@@ -1,11 +1,10 @@
|
|
|
1
|
-
from fabricks.context import SPARK
|
|
1
|
+
from fabricks.context import SPARK, Steps
|
|
2
2
|
from fabricks.context.log import DEFAULT_LOGGER
|
|
3
|
-
from fabricks.core.jobs.base._types import Steps
|
|
4
3
|
from fabricks.utils.sqlglot import fix as fix_sql
|
|
5
4
|
|
|
6
5
|
|
|
7
6
|
def deploy_views():
|
|
8
|
-
DEFAULT_LOGGER.info("create or replace fabricks (default) views")
|
|
7
|
+
DEFAULT_LOGGER.info("create or replace fabricks (default) views", extra={"label": "fabricks"})
|
|
9
8
|
|
|
10
9
|
create_or_replace_jobs_view()
|
|
11
10
|
create_or_replace_tables_view()
|
|
@@ -69,7 +68,7 @@ def create_or_replace_jobs_view():
|
|
|
69
68
|
dmls.append(dml)
|
|
70
69
|
|
|
71
70
|
except Exception:
|
|
72
|
-
DEFAULT_LOGGER.debug(f"could not find fabricks.{table}")
|
|
71
|
+
DEFAULT_LOGGER.debug(f"could not find fabricks.{table}", extra={"label": "fabricks"})
|
|
73
72
|
|
|
74
73
|
sql = f"""create or replace view fabricks.jobs with schema evolution as {" union all ".join(dmls)}"""
|
|
75
74
|
sql = fix_sql(sql)
|
|
@@ -96,7 +95,7 @@ def create_or_replace_tables_view():
|
|
|
96
95
|
dmls.append(dml)
|
|
97
96
|
|
|
98
97
|
except Exception:
|
|
99
|
-
DEFAULT_LOGGER.debug(f"could not find fabricks.{step}_tables")
|
|
98
|
+
DEFAULT_LOGGER.debug(f"could not find fabricks.{step}_tables", extra={"label": "fabricks"})
|
|
100
99
|
|
|
101
100
|
sql = f"""create or replace view fabricks.tables with schema evolution as {" union all ".join(dmls)}"""
|
|
102
101
|
sql = fix_sql(sql)
|
|
@@ -123,7 +122,7 @@ def create_or_replace_views_view():
|
|
|
123
122
|
dmls.append(dml)
|
|
124
123
|
|
|
125
124
|
except Exception:
|
|
126
|
-
DEFAULT_LOGGER.debug(f"could not find fabricks.{step}_views")
|
|
125
|
+
DEFAULT_LOGGER.debug(f"could not find fabricks.{step}_views", extra={"label": "fabricks"})
|
|
127
126
|
|
|
128
127
|
sql = f"""create or replace view fabricks.views with schema evolution as {" union all ".join(dmls)}"""
|
|
129
128
|
sql = fix_sql(sql)
|
|
@@ -153,7 +152,7 @@ def create_or_replace_dependencies_view():
|
|
|
153
152
|
dmls.append(dml)
|
|
154
153
|
|
|
155
154
|
except Exception:
|
|
156
|
-
DEFAULT_LOGGER.debug(f"could not find fabricks.{step}_dependencies")
|
|
155
|
+
DEFAULT_LOGGER.debug(f"could not find fabricks.{step}_dependencies", extra={"label": "fabricks"})
|
|
157
156
|
|
|
158
157
|
sql = f"""create or replace view fabricks.dependencies with schema evolution as {" union all ".join(dmls)}"""
|
|
159
158
|
sql = fix_sql(sql)
|
|
@@ -180,7 +179,7 @@ def create_or_replace_dependencies_flat_view():
|
|
|
180
179
|
"""
|
|
181
180
|
sql = fix_sql(sql)
|
|
182
181
|
|
|
183
|
-
DEFAULT_LOGGER.debug("create or replace fabricks.dependencies_flat", extra={"sql": sql})
|
|
182
|
+
DEFAULT_LOGGER.debug("create or replace fabricks.dependencies_flat", extra={"sql": sql, "label": "fabricks"})
|
|
184
183
|
SPARK.sql(sql)
|
|
185
184
|
|
|
186
185
|
|
|
@@ -221,7 +220,7 @@ def create_or_replace_dependencies_unpivot_view():
|
|
|
221
220
|
"""
|
|
222
221
|
sql = fix_sql(sql)
|
|
223
222
|
|
|
224
|
-
DEFAULT_LOGGER.debug("create or replace fabricks.dependencies_unpivot", extra={"sql": sql})
|
|
223
|
+
DEFAULT_LOGGER.debug("create or replace fabricks.dependencies_unpivot", extra={"sql": sql, "label": "fabricks"})
|
|
225
224
|
SPARK.sql(sql)
|
|
226
225
|
|
|
227
226
|
|
|
@@ -262,7 +261,7 @@ def create_or_replace_dependencies_circular_view():
|
|
|
262
261
|
"""
|
|
263
262
|
sql = fix_sql(sql)
|
|
264
263
|
|
|
265
|
-
DEFAULT_LOGGER.debug("create or replace fabricks.dependencies_circular", extra={"sql": sql})
|
|
264
|
+
DEFAULT_LOGGER.debug("create or replace fabricks.dependencies_circular", extra={"sql": sql, "label": "fabricks"})
|
|
266
265
|
SPARK.sql(sql)
|
|
267
266
|
|
|
268
267
|
|
|
@@ -334,7 +333,7 @@ def create_or_replace_logs_pivot_view():
|
|
|
334
333
|
"""
|
|
335
334
|
sql = fix_sql(sql)
|
|
336
335
|
|
|
337
|
-
DEFAULT_LOGGER.debug("create or replace fabricks.logs_pivot", extra={"sql": sql})
|
|
336
|
+
DEFAULT_LOGGER.debug("create or replace fabricks.logs_pivot", extra={"sql": sql, "label": "fabricks"})
|
|
338
337
|
SPARK.sql(sql)
|
|
339
338
|
|
|
340
339
|
|
|
@@ -361,7 +360,7 @@ def create_or_replace_last_schedule_view():
|
|
|
361
360
|
"""
|
|
362
361
|
sql = fix_sql(sql)
|
|
363
362
|
|
|
364
|
-
DEFAULT_LOGGER.debug("create or replace fabricks.last_schedule", extra={"sql": sql})
|
|
363
|
+
DEFAULT_LOGGER.debug("create or replace fabricks.last_schedule", extra={"sql": sql, "label": "fabricks"})
|
|
365
364
|
SPARK.sql(sql)
|
|
366
365
|
|
|
367
366
|
|
|
@@ -388,7 +387,7 @@ def create_or_replace_last_status_view():
|
|
|
388
387
|
"""
|
|
389
388
|
sql = fix_sql(sql)
|
|
390
389
|
|
|
391
|
-
DEFAULT_LOGGER.debug("create or replace fabricks.last_status", extra={"sql": sql})
|
|
390
|
+
DEFAULT_LOGGER.debug("create or replace fabricks.last_status", extra={"sql": sql, "label": "fabricks"})
|
|
392
391
|
SPARK.sql(sql)
|
|
393
392
|
|
|
394
393
|
|
|
@@ -427,7 +426,7 @@ def create_or_replace_previous_schedule_view():
|
|
|
427
426
|
"""
|
|
428
427
|
sql = fix_sql(sql)
|
|
429
428
|
|
|
430
|
-
DEFAULT_LOGGER.debug("create or replace fabricks.previous_schedule", extra={"sql": sql})
|
|
429
|
+
DEFAULT_LOGGER.debug("create or replace fabricks.previous_schedule", extra={"sql": sql, "label": "fabricks"})
|
|
431
430
|
SPARK.sql(sql)
|
|
432
431
|
|
|
433
432
|
|
|
@@ -453,7 +452,7 @@ def create_or_replace_schedules_view():
|
|
|
453
452
|
"""
|
|
454
453
|
sql = fix_sql(sql)
|
|
455
454
|
|
|
456
|
-
DEFAULT_LOGGER.debug("create or replace fabricks.schedules", extra={"sql": sql})
|
|
455
|
+
DEFAULT_LOGGER.debug("create or replace fabricks.schedules", extra={"sql": sql, "label": "fabricks"})
|
|
457
456
|
SPARK.sql(sql)
|
|
458
457
|
|
|
459
458
|
|
|
@@ -510,5 +509,5 @@ def create_or_replace_jobs_to_be_updated_view():
|
|
|
510
509
|
"""
|
|
511
510
|
sql = fix_sql(sql)
|
|
512
511
|
|
|
513
|
-
DEFAULT_LOGGER.debug("create or replace fabricks.jobs_to_be_updated", extra={"sql": sql})
|
|
512
|
+
DEFAULT_LOGGER.debug("create or replace fabricks.jobs_to_be_updated", extra={"sql": sql, "label": "fabricks"})
|
|
514
513
|
SPARK.sql(sql)
|
fabricks/metastore/database.py
CHANGED
|
@@ -7,7 +7,7 @@ from typing_extensions import deprecated
|
|
|
7
7
|
from fabricks.context import PATHS_STORAGE, SPARK
|
|
8
8
|
from fabricks.context.log import DEFAULT_LOGGER
|
|
9
9
|
from fabricks.metastore.utils import get_tables, get_views
|
|
10
|
-
from fabricks.utils.path import
|
|
10
|
+
from fabricks.utils.path import FileSharePath
|
|
11
11
|
|
|
12
12
|
|
|
13
13
|
class Database:
|
|
@@ -25,11 +25,11 @@ class Database:
|
|
|
25
25
|
|
|
26
26
|
@property
|
|
27
27
|
@deprecated("use delta_path instead")
|
|
28
|
-
def deltapath(self) ->
|
|
28
|
+
def deltapath(self) -> FileSharePath:
|
|
29
29
|
return self.storage.joinpath("delta")
|
|
30
30
|
|
|
31
31
|
@property
|
|
32
|
-
def delta_path(self) ->
|
|
32
|
+
def delta_path(self) -> FileSharePath:
|
|
33
33
|
return self.storage.joinpath("delta")
|
|
34
34
|
|
|
35
35
|
def create(self):
|
fabricks/metastore/table.py
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import re
|
|
2
|
-
from typing import
|
|
2
|
+
from typing import Sequence, overload
|
|
3
3
|
|
|
4
4
|
from delta import DeltaTable
|
|
5
5
|
from pyspark.errors.exceptions.base import AnalysisException
|
|
@@ -11,21 +11,22 @@ from fabricks.context import SPARK
|
|
|
11
11
|
from fabricks.context.log import DEFAULT_LOGGER
|
|
12
12
|
from fabricks.metastore._types import AddedColumn, ChangedColumn, DroppedColumn, SchemaDiff
|
|
13
13
|
from fabricks.metastore.dbobject import DbObject
|
|
14
|
-
from fabricks.
|
|
14
|
+
from fabricks.models import ForeignKey, PrimaryKey
|
|
15
|
+
from fabricks.utils.path import FileSharePath
|
|
15
16
|
from fabricks.utils.sqlglot import fix
|
|
16
17
|
|
|
17
18
|
|
|
18
19
|
class Table(DbObject):
|
|
19
20
|
@classmethod
|
|
20
|
-
def from_step_topic_item(cls, step: str, topic: str, item: str, spark:
|
|
21
|
+
def from_step_topic_item(cls, step: str, topic: str, item: str, spark: SparkSession | None = SPARK):
|
|
21
22
|
return cls(step, topic, item, spark=spark)
|
|
22
23
|
|
|
23
24
|
@property
|
|
24
|
-
def deltapath(self) ->
|
|
25
|
+
def deltapath(self) -> FileSharePath:
|
|
25
26
|
return self.database.delta_path.joinpath("/".join(self.levels))
|
|
26
27
|
|
|
27
28
|
@property
|
|
28
|
-
def delta_path(self) ->
|
|
29
|
+
def delta_path(self) -> FileSharePath:
|
|
29
30
|
return self.database.delta_path.joinpath("/".join(self.levels))
|
|
30
31
|
|
|
31
32
|
@property
|
|
@@ -43,7 +44,7 @@ class Table(DbObject):
|
|
|
43
44
|
return self.spark.sql(f"select * from {self}")
|
|
44
45
|
|
|
45
46
|
@property
|
|
46
|
-
def columns(self) ->
|
|
47
|
+
def columns(self) -> list[str]:
|
|
47
48
|
assert self.registered, f"{self} not registered"
|
|
48
49
|
|
|
49
50
|
return self.dataframe.columns
|
|
@@ -98,16 +99,16 @@ class Table(DbObject):
|
|
|
98
99
|
self,
|
|
99
100
|
df: DataFrame,
|
|
100
101
|
*,
|
|
101
|
-
partitioning:
|
|
102
|
-
partition_by:
|
|
103
|
-
identity:
|
|
104
|
-
liquid_clustering:
|
|
105
|
-
cluster_by:
|
|
106
|
-
properties:
|
|
107
|
-
masks:
|
|
108
|
-
primary_key:
|
|
109
|
-
foreign_keys:
|
|
110
|
-
comments:
|
|
102
|
+
partitioning: bool | None = False,
|
|
103
|
+
partition_by: list[str] | str | None = None,
|
|
104
|
+
identity: bool | None = False,
|
|
105
|
+
liquid_clustering: bool | None = False,
|
|
106
|
+
cluster_by: list[str] | str | None = None,
|
|
107
|
+
properties: dict[str, str | bool | int] | None = None,
|
|
108
|
+
masks: dict[str, str] | None = None,
|
|
109
|
+
primary_key: dict[str, PrimaryKey] | None = None,
|
|
110
|
+
foreign_keys: dict[str, ForeignKey] | None = None,
|
|
111
|
+
comments: dict[str, str] | None = None,
|
|
111
112
|
): ...
|
|
112
113
|
|
|
113
114
|
@overload
|
|
@@ -115,32 +116,32 @@ class Table(DbObject):
|
|
|
115
116
|
self,
|
|
116
117
|
*,
|
|
117
118
|
schema: StructType,
|
|
118
|
-
partitioning:
|
|
119
|
-
partition_by:
|
|
120
|
-
identity:
|
|
121
|
-
liquid_clustering:
|
|
122
|
-
cluster_by:
|
|
123
|
-
properties:
|
|
124
|
-
masks:
|
|
125
|
-
primary_key:
|
|
126
|
-
foreign_keys:
|
|
127
|
-
comments:
|
|
119
|
+
partitioning: bool | None = False,
|
|
120
|
+
partition_by: list[str] | str | None = None,
|
|
121
|
+
identity: bool | None = False,
|
|
122
|
+
liquid_clustering: bool | None = False,
|
|
123
|
+
cluster_by: list[str] | str | None = None,
|
|
124
|
+
properties: dict[str, str | bool | int] | None = None,
|
|
125
|
+
masks: dict[str, str] | None = None,
|
|
126
|
+
primary_key: dict[str, PrimaryKey] | None = None,
|
|
127
|
+
foreign_keys: dict[str, ForeignKey] | None = None,
|
|
128
|
+
comments: dict[str, str] | None = None,
|
|
128
129
|
): ...
|
|
129
130
|
|
|
130
131
|
def create(
|
|
131
132
|
self,
|
|
132
|
-
df:
|
|
133
|
-
schema:
|
|
134
|
-
partitioning:
|
|
135
|
-
partition_by:
|
|
136
|
-
identity:
|
|
137
|
-
liquid_clustering:
|
|
138
|
-
cluster_by:
|
|
139
|
-
properties:
|
|
140
|
-
masks:
|
|
141
|
-
primary_key:
|
|
142
|
-
foreign_keys:
|
|
143
|
-
comments:
|
|
133
|
+
df: DataFrame | None = None,
|
|
134
|
+
schema: StructType | None = None,
|
|
135
|
+
partitioning: bool | None = False,
|
|
136
|
+
partition_by: list[str] | str | None = None,
|
|
137
|
+
identity: bool | None = False,
|
|
138
|
+
liquid_clustering: bool | None = False,
|
|
139
|
+
cluster_by: list[str] | str | None = None,
|
|
140
|
+
properties: dict[str, str | bool | int] | None = None,
|
|
141
|
+
masks: dict[str, str] | None = None,
|
|
142
|
+
primary_key: dict[str, PrimaryKey] | None = None,
|
|
143
|
+
foreign_keys: dict[str, ForeignKey] | None = None,
|
|
144
|
+
comments: dict[str, str] | None = None,
|
|
144
145
|
):
|
|
145
146
|
self._create(
|
|
146
147
|
df=df,
|
|
@@ -158,8 +159,11 @@ class Table(DbObject):
|
|
|
158
159
|
)
|
|
159
160
|
|
|
160
161
|
def _get_ddl_columns(
|
|
161
|
-
self,
|
|
162
|
-
|
|
162
|
+
self,
|
|
163
|
+
df: DataFrame,
|
|
164
|
+
masks: dict[str, str] | None,
|
|
165
|
+
comments: dict[str, str] | None,
|
|
166
|
+
) -> list[str]:
|
|
163
167
|
def _backtick(name: str, dtype: str) -> str:
|
|
164
168
|
j = df.schema[name].jsonValue()
|
|
165
169
|
r = re.compile(r"(?<='name': ')[^']+(?=',)")
|
|
@@ -188,18 +192,18 @@ class Table(DbObject):
|
|
|
188
192
|
|
|
189
193
|
def _create(
|
|
190
194
|
self,
|
|
191
|
-
df:
|
|
192
|
-
schema:
|
|
193
|
-
partitioning:
|
|
194
|
-
partition_by:
|
|
195
|
-
identity:
|
|
196
|
-
liquid_clustering:
|
|
197
|
-
cluster_by:
|
|
198
|
-
properties:
|
|
199
|
-
masks:
|
|
200
|
-
primary_key:
|
|
201
|
-
foreign_keys:
|
|
202
|
-
comments:
|
|
195
|
+
df: DataFrame | None = None,
|
|
196
|
+
schema: StructType | None = None,
|
|
197
|
+
partitioning: bool | None = False,
|
|
198
|
+
partition_by: list[str] | str | None = None,
|
|
199
|
+
identity: bool | None = False,
|
|
200
|
+
liquid_clustering: bool | None = False,
|
|
201
|
+
cluster_by: list[str] | str | None = None,
|
|
202
|
+
properties: dict[str, str | bool | int] | None = None,
|
|
203
|
+
masks: dict[str, str] | None = None,
|
|
204
|
+
primary_key: dict[str, PrimaryKey] | None = None,
|
|
205
|
+
foreign_keys: dict[str, ForeignKey] | None = None,
|
|
206
|
+
comments: dict[str, str] | None = None,
|
|
203
207
|
):
|
|
204
208
|
DEFAULT_LOGGER.info("create table", extra={"label": self})
|
|
205
209
|
if not df:
|
|
@@ -238,19 +242,21 @@ class Table(DbObject):
|
|
|
238
242
|
assert len(primary_key) == 1, "only one primary key allowed"
|
|
239
243
|
|
|
240
244
|
for key, value in primary_key.items():
|
|
241
|
-
keys = value
|
|
245
|
+
keys = value.keys
|
|
242
246
|
if isinstance(keys, str):
|
|
243
247
|
keys = [keys]
|
|
248
|
+
|
|
244
249
|
ddl_primary_key = f", constraint {key} primary key (" + ", ".join(keys) + ")"
|
|
245
250
|
|
|
246
251
|
if foreign_keys:
|
|
247
252
|
fks = []
|
|
248
253
|
|
|
249
254
|
for key, value in foreign_keys.items():
|
|
250
|
-
reference = value
|
|
251
|
-
keys = value
|
|
255
|
+
reference = value.reference
|
|
256
|
+
keys = value.keys
|
|
252
257
|
if isinstance(keys, str):
|
|
253
258
|
keys = [keys]
|
|
259
|
+
|
|
254
260
|
keys = ", ".join([f"`{k}`" for k in keys])
|
|
255
261
|
fk = f"constraint {key} foreign key ({keys}) references {reference}"
|
|
256
262
|
fks.append(fk)
|
|
@@ -301,7 +307,13 @@ class Table(DbObject):
|
|
|
301
307
|
|
|
302
308
|
@property
|
|
303
309
|
def is_deltatable(self) -> bool:
|
|
304
|
-
|
|
310
|
+
try:
|
|
311
|
+
return DeltaTable.isDeltaTable(self.spark, str(self.delta_path))
|
|
312
|
+
except Exception as e:
|
|
313
|
+
if "PERMISSION_DENIED" in str(e) or "row filter or column mask" in str(e):
|
|
314
|
+
return True
|
|
315
|
+
else:
|
|
316
|
+
raise e
|
|
305
317
|
|
|
306
318
|
@property
|
|
307
319
|
def column_mapping_enabled(self) -> bool:
|
|
@@ -329,13 +341,17 @@ class Table(DbObject):
|
|
|
329
341
|
self.create_restore_point()
|
|
330
342
|
self.spark.sql(f"truncate table {self.qualified_name}")
|
|
331
343
|
|
|
332
|
-
def schema_drifted(self, df: DataFrame, exclude_columns_with_prefix:
|
|
344
|
+
def schema_drifted(self, df: DataFrame, exclude_columns_with_prefix: list[str] | None = None) -> bool:
|
|
333
345
|
assert self.registered, f"{self} not registered"
|
|
334
346
|
|
|
335
|
-
diffs = self.get_schema_differences(df)
|
|
347
|
+
diffs = self.get_schema_differences(df, exclude_columns_with_prefix=exclude_columns_with_prefix)
|
|
336
348
|
return len(diffs) > 0
|
|
337
349
|
|
|
338
|
-
def get_schema_differences(
|
|
350
|
+
def get_schema_differences(
|
|
351
|
+
self,
|
|
352
|
+
df: DataFrame,
|
|
353
|
+
exclude_columns_with_prefix: list[str] | None = None,
|
|
354
|
+
) -> Sequence[SchemaDiff]:
|
|
339
355
|
assert self.registered, f"{self} not registered"
|
|
340
356
|
|
|
341
357
|
DEFAULT_LOGGER.debug("get schema differences", extra={"label": self, "df": df})
|
|
@@ -346,6 +362,9 @@ class Table(DbObject):
|
|
|
346
362
|
df1 = df1.drop("__identity")
|
|
347
363
|
|
|
348
364
|
all_columns = set(df1.columns).union(set(df.columns))
|
|
365
|
+
if exclude_columns_with_prefix:
|
|
366
|
+
for excluded in exclude_columns_with_prefix:
|
|
367
|
+
all_columns = {c for c in all_columns if not c.startswith(excluded)}
|
|
349
368
|
|
|
350
369
|
df1_dict = {name: dtype for name, dtype in df1.dtypes}
|
|
351
370
|
df2_dict = {name: dtype for name, dtype in df.dtypes}
|
|
@@ -378,8 +397,16 @@ class Table(DbObject):
|
|
|
378
397
|
|
|
379
398
|
return diffs
|
|
380
399
|
|
|
381
|
-
def update_schema(self, df: DataFrame, widen_types: bool = False):
|
|
400
|
+
def update_schema(self, df: DataFrame | None = None, schema: StructType | None = None, widen_types: bool = False):
|
|
401
|
+
if df is None and schema is None:
|
|
402
|
+
raise ValueError("Either df or schema must be provided")
|
|
403
|
+
|
|
404
|
+
if df is None and schema is not None:
|
|
405
|
+
df = self.spark.createDataFrame([], schema)
|
|
406
|
+
|
|
407
|
+
assert df is not None
|
|
382
408
|
assert self.registered, f"{self} not registered"
|
|
409
|
+
|
|
383
410
|
if not self.column_mapping_enabled:
|
|
384
411
|
self.enable_column_mapping()
|
|
385
412
|
|
|
@@ -428,8 +455,16 @@ class Table(DbObject):
|
|
|
428
455
|
except Exception:
|
|
429
456
|
pass
|
|
430
457
|
|
|
431
|
-
def overwrite_schema(self, df: DataFrame):
|
|
458
|
+
def overwrite_schema(self, df: DataFrame | None = None, schema: StructType | None = None):
|
|
459
|
+
if df is None and schema is None:
|
|
460
|
+
raise ValueError("Either df or schema must be provided")
|
|
461
|
+
|
|
462
|
+
if df is None and schema is not None:
|
|
463
|
+
df = self.spark.createDataFrame([], schema)
|
|
464
|
+
|
|
465
|
+
assert df is not None
|
|
432
466
|
assert self.registered, f"{self} not registered"
|
|
467
|
+
|
|
433
468
|
if not self.column_mapping_enabled:
|
|
434
469
|
self.enable_column_mapping()
|
|
435
470
|
|
|
@@ -473,7 +508,7 @@ class Table(DbObject):
|
|
|
473
508
|
pass
|
|
474
509
|
self.spark.sql("SET self.spark.databricks.delta.retentionDurationCheck.enabled = True")
|
|
475
510
|
|
|
476
|
-
def optimize(self, columns:
|
|
511
|
+
def optimize(self, columns: str | list[str] | None = None):
|
|
477
512
|
assert self.registered, f"{self} not registered"
|
|
478
513
|
|
|
479
514
|
DEFAULT_LOGGER.info("optimize", extra={"label": self})
|
|
@@ -579,7 +614,7 @@ class Table(DbObject):
|
|
|
579
614
|
|
|
580
615
|
return self.spark.sql(f"describe detail {self.qualified_name}")
|
|
581
616
|
|
|
582
|
-
def get_partitions(self) ->
|
|
617
|
+
def get_partitions(self) -> list[str]:
|
|
583
618
|
assert self.registered, f"{self} not registered"
|
|
584
619
|
|
|
585
620
|
try:
|
|
@@ -610,7 +645,7 @@ class Table(DbObject):
|
|
|
610
645
|
version = df.select(max("version")).collect()[0][0]
|
|
611
646
|
return version
|
|
612
647
|
|
|
613
|
-
def get_property(self, key: str) ->
|
|
648
|
+
def get_property(self, key: str) -> str | None:
|
|
614
649
|
assert self.registered, f"{self} not registered"
|
|
615
650
|
|
|
616
651
|
try:
|
|
@@ -652,7 +687,7 @@ class Table(DbObject):
|
|
|
652
687
|
"""
|
|
653
688
|
)
|
|
654
689
|
|
|
655
|
-
def set_property(self, key:
|
|
690
|
+
def set_property(self, key: str | int, value: str | int):
|
|
656
691
|
assert self.registered, f"{self} not registered"
|
|
657
692
|
|
|
658
693
|
DEFAULT_LOGGER.debug(f"set property {key} = {value}", extra={"label": self})
|
|
@@ -735,7 +770,7 @@ class Table(DbObject):
|
|
|
735
770
|
"""
|
|
736
771
|
)
|
|
737
772
|
|
|
738
|
-
def add_column(self, name: str, type: str, after:
|
|
773
|
+
def add_column(self, name: str, type: str, after: str | None = None):
|
|
739
774
|
assert self.registered, f"{self} not registered"
|
|
740
775
|
|
|
741
776
|
DEFAULT_LOGGER.info(f"add column {name} ({type})", extra={"label": self})
|
|
@@ -747,7 +782,7 @@ class Table(DbObject):
|
|
|
747
782
|
"""
|
|
748
783
|
)
|
|
749
784
|
|
|
750
|
-
def create_bloomfilter_index(self, columns:
|
|
785
|
+
def create_bloomfilter_index(self, columns: str | list[str]):
|
|
751
786
|
assert self.registered, f"{self} not registered"
|
|
752
787
|
|
|
753
788
|
if isinstance(columns, str):
|
|
@@ -790,7 +825,7 @@ class Table(DbObject):
|
|
|
790
825
|
df = self.spark.sql(f"describe history {self.qualified_name}")
|
|
791
826
|
return df
|
|
792
827
|
|
|
793
|
-
def enable_liquid_clustering(self, columns:
|
|
828
|
+
def enable_liquid_clustering(self, columns: str | list[str] | None = None, auto: bool | None = False):
|
|
794
829
|
assert self.registered, f"{self} not registered"
|
|
795
830
|
|
|
796
831
|
if auto:
|
|
@@ -0,0 +1,125 @@
|
|
|
1
|
+
"""Fabricks models module - All Pydantic models for jobs, steps, and runtime configuration."""
|
|
2
|
+
|
|
3
|
+
# Common types and aliases
|
|
4
|
+
from fabricks.models.common import (
|
|
5
|
+
AllowedChangeDataCaptures,
|
|
6
|
+
AllowedConstraintOptions,
|
|
7
|
+
AllowedFileFormats,
|
|
8
|
+
AllowedForeignKeyOptions,
|
|
9
|
+
AllowedModes,
|
|
10
|
+
AllowedModesBronze,
|
|
11
|
+
AllowedModesGold,
|
|
12
|
+
AllowedModesSilver,
|
|
13
|
+
AllowedOperations,
|
|
14
|
+
AllowedOrigins,
|
|
15
|
+
AllowedTypes,
|
|
16
|
+
Database,
|
|
17
|
+
DatabasePathOptions,
|
|
18
|
+
ExtenderOptions,
|
|
19
|
+
InvokerOptions,
|
|
20
|
+
SparkOptions,
|
|
21
|
+
)
|
|
22
|
+
from fabricks.models.dependency import JobDependency, SchemaDependencies
|
|
23
|
+
|
|
24
|
+
# Job models
|
|
25
|
+
from fabricks.models.job import BronzeOptions as JobBronzeOptions
|
|
26
|
+
from fabricks.models.job import CheckOptions
|
|
27
|
+
from fabricks.models.job import GoldOptions as JobGoldOptions
|
|
28
|
+
from fabricks.models.job import JobConf, JobConfBase, JobConfBronze, JobConfGold, JobConfSilver, ParserOptions
|
|
29
|
+
from fabricks.models.job import SilverOptions as JobSilverOptions
|
|
30
|
+
from fabricks.models.job import TOptions
|
|
31
|
+
from fabricks.models.path import Paths
|
|
32
|
+
|
|
33
|
+
# Runtime models
|
|
34
|
+
from fabricks.models.runtime import RuntimeConf, RuntimeOptions, RuntimePathOptions, RuntimeTimeoutOptions
|
|
35
|
+
|
|
36
|
+
# Schedule models
|
|
37
|
+
from fabricks.models.schedule import Schedule, ScheduleOptions
|
|
38
|
+
|
|
39
|
+
# Step models
|
|
40
|
+
from fabricks.models.step import BronzeConf as StepBronzeConf
|
|
41
|
+
from fabricks.models.step import BronzeOptions as StepBronzeOptions
|
|
42
|
+
from fabricks.models.step import GoldConf as StepGoldConf
|
|
43
|
+
from fabricks.models.step import GoldOptions as StepGoldOptions
|
|
44
|
+
from fabricks.models.step import PowerBI
|
|
45
|
+
from fabricks.models.step import SilverConf as StepSilverConf
|
|
46
|
+
from fabricks.models.step import SilverOptions as StepSilverOptions
|
|
47
|
+
from fabricks.models.step import Step, StepOptions, StepPathOptions, StepTimeoutOptions
|
|
48
|
+
|
|
49
|
+
# Table models
|
|
50
|
+
from fabricks.models.table import (
|
|
51
|
+
ForeignKey,
|
|
52
|
+
ForeignKeyOptions,
|
|
53
|
+
PrimaryKey,
|
|
54
|
+
PrimaryKeyOptions,
|
|
55
|
+
StepTableOptions,
|
|
56
|
+
TableOptions,
|
|
57
|
+
)
|
|
58
|
+
|
|
59
|
+
# Utility functions
|
|
60
|
+
from fabricks.models.utils import get_dependency_id, get_job_id
|
|
61
|
+
|
|
62
|
+
__all__ = [
|
|
63
|
+
# Common types
|
|
64
|
+
"AllowedChangeDataCaptures",
|
|
65
|
+
"AllowedConstraintOptions",
|
|
66
|
+
"AllowedFileFormats",
|
|
67
|
+
"AllowedForeignKeyOptions",
|
|
68
|
+
"AllowedModes",
|
|
69
|
+
"AllowedModesBronze",
|
|
70
|
+
"AllowedModesGold",
|
|
71
|
+
"AllowedModesSilver",
|
|
72
|
+
"AllowedOperations",
|
|
73
|
+
"AllowedOrigins",
|
|
74
|
+
"AllowedTypes",
|
|
75
|
+
"Database",
|
|
76
|
+
"DatabasePathOptions",
|
|
77
|
+
"ExtenderOptions",
|
|
78
|
+
"SparkOptions",
|
|
79
|
+
# Job models
|
|
80
|
+
"CheckOptions",
|
|
81
|
+
"InvokerOptions",
|
|
82
|
+
"JobBronzeOptions",
|
|
83
|
+
"JobConf",
|
|
84
|
+
"JobConfBase",
|
|
85
|
+
"JobConfBronze",
|
|
86
|
+
"JobConfGold",
|
|
87
|
+
"JobConfSilver",
|
|
88
|
+
"JobDependency",
|
|
89
|
+
"JobGoldOptions",
|
|
90
|
+
"JobSilverOptions",
|
|
91
|
+
"Paths",
|
|
92
|
+
"SchemaDependencies",
|
|
93
|
+
"TOptions",
|
|
94
|
+
# Runtime models
|
|
95
|
+
"RuntimeConf",
|
|
96
|
+
"RuntimeOptions",
|
|
97
|
+
"RuntimePathOptions",
|
|
98
|
+
"RuntimeTimeoutOptions",
|
|
99
|
+
# Step models
|
|
100
|
+
"PowerBI",
|
|
101
|
+
"Step",
|
|
102
|
+
"StepBronzeConf",
|
|
103
|
+
"StepBronzeOptions",
|
|
104
|
+
"StepGoldConf",
|
|
105
|
+
"StepGoldOptions",
|
|
106
|
+
"StepOptions",
|
|
107
|
+
"StepPathOptions",
|
|
108
|
+
"StepSilverConf",
|
|
109
|
+
"StepSilverOptions",
|
|
110
|
+
"StepTimeoutOptions",
|
|
111
|
+
# Table models
|
|
112
|
+
"ForeignKey",
|
|
113
|
+
"ForeignKeyOptions",
|
|
114
|
+
"PrimaryKey",
|
|
115
|
+
"PrimaryKeyOptions",
|
|
116
|
+
"StepTableOptions",
|
|
117
|
+
"TableOptions",
|
|
118
|
+
"ParserOptions",
|
|
119
|
+
# Schedule models
|
|
120
|
+
"ScheduleOptions",
|
|
121
|
+
"Schedule",
|
|
122
|
+
# Utility functions
|
|
123
|
+
"get_dependency_id",
|
|
124
|
+
"get_job_id",
|
|
125
|
+
]
|