fabricks 3.0.19__py3-none-any.whl → 4.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- fabricks/api/context.py +15 -3
- fabricks/api/notebooks/schedule.py +2 -3
- fabricks/api/parsers.py +2 -1
- fabricks/api/utils.py +3 -1
- fabricks/cdc/__init__.py +1 -2
- fabricks/cdc/base/__init__.py +1 -2
- fabricks/cdc/base/_types.py +5 -3
- fabricks/cdc/base/configurator.py +5 -0
- fabricks/cdc/base/generator.py +7 -3
- fabricks/cdc/base/merger.py +2 -0
- fabricks/cdc/base/processor.py +15 -0
- fabricks/cdc/templates/README.md +490 -0
- fabricks/cdc/templates/ctes/base.sql.jinja +1 -0
- fabricks/cdc/templates/ctes/current.sql.jinja +4 -0
- fabricks/cdc/templates/merges/scd1.sql.jinja +6 -0
- fabricks/cdc/templates/merges/scd2.sql.jinja +6 -0
- fabricks/cdc/templates/queries/context.sql.jinja +104 -96
- fabricks/cdc/templates/query.sql.jinja +1 -1
- fabricks/context/__init__.py +13 -1
- fabricks/context/config.py +13 -122
- fabricks/context/log.py +92 -1
- fabricks/context/runtime.py +35 -69
- fabricks/context/spark_session.py +4 -4
- fabricks/context/utils.py +26 -39
- fabricks/core/__init__.py +2 -2
- fabricks/core/dags/base.py +5 -5
- fabricks/core/dags/processor.py +2 -3
- fabricks/core/extenders.py +1 -1
- fabricks/core/job_schema.py +26 -16
- fabricks/core/jobs/__init__.py +1 -7
- fabricks/core/jobs/base/README.md +1545 -0
- fabricks/core/jobs/base/__init__.py +1 -8
- fabricks/core/jobs/base/checker.py +7 -7
- fabricks/core/jobs/base/configurator.py +142 -63
- fabricks/core/jobs/base/generator.py +38 -34
- fabricks/core/jobs/base/invoker.py +48 -63
- fabricks/core/jobs/base/processor.py +13 -28
- fabricks/core/jobs/bronze.py +88 -38
- fabricks/core/jobs/get_job.py +3 -6
- fabricks/core/jobs/get_job_conf.py +19 -68
- fabricks/core/jobs/get_jobs.py +10 -11
- fabricks/core/jobs/get_schedules.py +3 -17
- fabricks/core/jobs/gold.py +89 -47
- fabricks/core/jobs/silver.py +42 -22
- fabricks/core/masks.py +11 -8
- fabricks/core/parsers/__init__.py +0 -2
- fabricks/core/parsers/base.py +10 -10
- fabricks/core/parsers/decorator.py +1 -1
- fabricks/core/parsers/get_parser.py +4 -5
- fabricks/core/schedules/process.py +1 -4
- fabricks/core/steps/base.py +27 -17
- fabricks/core/steps/get_step.py +2 -4
- fabricks/core/steps/get_step_conf.py +3 -7
- fabricks/core/udfs.py +7 -7
- fabricks/core/views.py +2 -2
- fabricks/deploy/__init__.py +27 -16
- fabricks/deploy/masks.py +1 -1
- fabricks/deploy/notebooks.py +19 -16
- fabricks/deploy/schedules.py +1 -1
- fabricks/deploy/tables.py +66 -49
- fabricks/deploy/udfs.py +2 -2
- fabricks/deploy/views.py +15 -16
- fabricks/metastore/database.py +3 -3
- fabricks/metastore/table.py +103 -68
- fabricks/models/__init__.py +125 -0
- fabricks/models/common.py +79 -0
- fabricks/models/config.py +225 -0
- fabricks/models/dependency.py +50 -0
- fabricks/models/job.py +157 -0
- fabricks/models/path.py +17 -0
- fabricks/models/runtime.py +182 -0
- fabricks/models/schedule.py +21 -0
- fabricks/models/step.py +103 -0
- fabricks/models/table.py +77 -0
- fabricks/{core/jobs/get_job_id.py → models/utils.py} +2 -0
- fabricks/utils/helpers.py +6 -5
- fabricks/utils/log.py +25 -6
- fabricks/utils/path.py +265 -108
- fabricks/utils/pip.py +7 -7
- fabricks/utils/read/read.py +23 -22
- fabricks/utils/read/read_yaml.py +2 -2
- fabricks/utils/write/delta.py +4 -4
- fabricks/utils/write/stream.py +2 -2
- {fabricks-3.0.19.dist-info → fabricks-4.0.1.dist-info}/METADATA +9 -4
- {fabricks-3.0.19.dist-info → fabricks-4.0.1.dist-info}/RECORD +86 -83
- fabricks/context/_types.py +0 -139
- fabricks/context/helpers.py +0 -63
- fabricks/core/jobs/base/_types.py +0 -284
- fabricks/core/parsers/_types.py +0 -6
- fabricks/utils/fdict.py +0 -240
- fabricks/utils/pydantic.py +0 -94
- fabricks/utils/schema/__init__.py +0 -7
- fabricks/utils/schema/get_json_schema_for_type.py +0 -161
- fabricks/utils/schema/get_schema_for_type.py +0 -99
- {fabricks-3.0.19.dist-info → fabricks-4.0.1.dist-info}/WHEEL +0 -0
|
@@ -0,0 +1,182 @@
|
|
|
1
|
+
"""Runtime configuration models."""
|
|
2
|
+
|
|
3
|
+
from typing import ClassVar
|
|
4
|
+
|
|
5
|
+
from pydantic import BaseModel, ConfigDict, Field, computed_field
|
|
6
|
+
|
|
7
|
+
from fabricks.models.common import Database, ExtenderOptions, SparkOptions
|
|
8
|
+
from fabricks.models.config import ConfigOptions
|
|
9
|
+
from fabricks.models.step import BronzeConf, GoldConf, PowerBI, SilverConf
|
|
10
|
+
from fabricks.utils.path import FileSharePath, GitPath, resolve_fileshare_path, resolve_git_path
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class RuntimePathOptions(BaseModel):
|
|
14
|
+
"""Path configuration for runtime components."""
|
|
15
|
+
|
|
16
|
+
model_config = ConfigDict(extra="forbid", frozen=True)
|
|
17
|
+
|
|
18
|
+
storage: str
|
|
19
|
+
udfs: str
|
|
20
|
+
parsers: str
|
|
21
|
+
schedules: str
|
|
22
|
+
views: str
|
|
23
|
+
requirements: str
|
|
24
|
+
storage_credential: str | None = None
|
|
25
|
+
extenders: str | None = None
|
|
26
|
+
masks: str | None = None
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class UDFOptions(BaseModel):
|
|
30
|
+
prefix: str | None = None
|
|
31
|
+
schema_name: str | None = Field(None, alias="schema")
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
class MaskOptions(BaseModel):
|
|
35
|
+
prefix: str | None = None
|
|
36
|
+
schema_name: str | None = Field(None, alias="schema")
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
class RuntimeResolvedPathOptions(BaseModel):
|
|
40
|
+
"""Resolved path objects for runtime components."""
|
|
41
|
+
|
|
42
|
+
model_config = ConfigDict(extra="forbid", frozen=True, arbitrary_types_allowed=True)
|
|
43
|
+
|
|
44
|
+
storage: FileSharePath
|
|
45
|
+
udfs: GitPath
|
|
46
|
+
parsers: GitPath
|
|
47
|
+
schedules: GitPath
|
|
48
|
+
views: GitPath
|
|
49
|
+
requirements: GitPath
|
|
50
|
+
extenders: GitPath
|
|
51
|
+
masks: GitPath
|
|
52
|
+
|
|
53
|
+
storages: dict[str, FileSharePath]
|
|
54
|
+
runtimes: dict[str, GitPath]
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
class RuntimeTimeoutOptions(BaseModel):
|
|
58
|
+
"""Timeout settings for runtime operations."""
|
|
59
|
+
|
|
60
|
+
model_config = ConfigDict(extra="forbid", frozen=True)
|
|
61
|
+
|
|
62
|
+
step: int
|
|
63
|
+
job: int
|
|
64
|
+
pre_run: int
|
|
65
|
+
post_run: int
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
class RuntimeOptions(BaseModel):
|
|
69
|
+
"""Main runtime configuration options."""
|
|
70
|
+
|
|
71
|
+
model_config = ConfigDict(extra="forbid", frozen=True)
|
|
72
|
+
|
|
73
|
+
secret_scope: str
|
|
74
|
+
encryption_key: str | None = None
|
|
75
|
+
unity_catalog: bool | None = None
|
|
76
|
+
type_widening: bool | None = None
|
|
77
|
+
catalog: str | None = None
|
|
78
|
+
workers: int
|
|
79
|
+
timeouts: RuntimeTimeoutOptions
|
|
80
|
+
retention_days: int
|
|
81
|
+
timezone: str | None = None
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
class RuntimeConf(BaseModel):
|
|
85
|
+
"""Complete runtime configuration."""
|
|
86
|
+
|
|
87
|
+
model_config = ConfigDict(extra="forbid", frozen=True, arbitrary_types_allowed=True)
|
|
88
|
+
|
|
89
|
+
name: str
|
|
90
|
+
options: RuntimeOptions
|
|
91
|
+
path_options: RuntimePathOptions
|
|
92
|
+
extender_options: ExtenderOptions | None = None
|
|
93
|
+
spark_options: SparkOptions | None = None
|
|
94
|
+
udf_options: UDFOptions | None = None
|
|
95
|
+
mask_options: MaskOptions | None = None
|
|
96
|
+
bronze: list[BronzeConf] | None = None
|
|
97
|
+
silver: list[SilverConf] | None = None
|
|
98
|
+
gold: list[GoldConf] | None = None
|
|
99
|
+
powerbi: list[PowerBI] | None = None
|
|
100
|
+
databases: list[Database] | None = None
|
|
101
|
+
variables: dict[str, str] | None = None
|
|
102
|
+
credentials: dict[str, str] | None = None
|
|
103
|
+
|
|
104
|
+
config: ClassVar[ConfigOptions] = ConfigOptions()
|
|
105
|
+
|
|
106
|
+
@computed_field
|
|
107
|
+
@property
|
|
108
|
+
def resolved_path_options(self) -> RuntimeResolvedPathOptions:
|
|
109
|
+
"""Get all runtime paths resolved as Path objects."""
|
|
110
|
+
return self._resolve_paths()
|
|
111
|
+
|
|
112
|
+
def _resolve_paths(self) -> RuntimeResolvedPathOptions:
|
|
113
|
+
"""
|
|
114
|
+
Get all runtime paths resolved as Path objects.
|
|
115
|
+
|
|
116
|
+
Args:
|
|
117
|
+
runtime: The base runtime path (e.g., PATH_RUNTIME)
|
|
118
|
+
|
|
119
|
+
Returns:
|
|
120
|
+
RuntimeResolvedPathOptions with all paths resolved
|
|
121
|
+
"""
|
|
122
|
+
# Collect all storage paths with variable substitution
|
|
123
|
+
storage_paths: dict[str, FileSharePath] = {
|
|
124
|
+
"fabricks": resolve_fileshare_path(self.path_options.storage, variables=self.variables),
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
# Add storage paths for bronze/silver/gold/databases
|
|
128
|
+
for objects in [self.bronze, self.silver, self.gold, self.databases]:
|
|
129
|
+
if objects:
|
|
130
|
+
for obj in objects:
|
|
131
|
+
storage_paths[obj.name] = resolve_fileshare_path(
|
|
132
|
+
obj.path_options.storage,
|
|
133
|
+
variables=self.variables,
|
|
134
|
+
)
|
|
135
|
+
|
|
136
|
+
root = self.config.resolved_paths.runtime
|
|
137
|
+
|
|
138
|
+
# Collect all runtime paths with base path joining
|
|
139
|
+
runtime_paths: dict[str, GitPath] = {}
|
|
140
|
+
for objects in [self.bronze, self.silver, self.gold]:
|
|
141
|
+
if objects:
|
|
142
|
+
for obj in objects:
|
|
143
|
+
runtime_paths[obj.name] = resolve_git_path(
|
|
144
|
+
obj.path_options.runtime,
|
|
145
|
+
base=root,
|
|
146
|
+
)
|
|
147
|
+
|
|
148
|
+
return RuntimeResolvedPathOptions(
|
|
149
|
+
storage=storage_paths["fabricks"],
|
|
150
|
+
udfs=resolve_git_path(
|
|
151
|
+
path=self.path_options.udfs,
|
|
152
|
+
base=root,
|
|
153
|
+
),
|
|
154
|
+
parsers=resolve_git_path(
|
|
155
|
+
path=self.path_options.parsers,
|
|
156
|
+
base=root,
|
|
157
|
+
),
|
|
158
|
+
schedules=resolve_git_path(
|
|
159
|
+
path=self.path_options.schedules,
|
|
160
|
+
base=root,
|
|
161
|
+
),
|
|
162
|
+
views=resolve_git_path(
|
|
163
|
+
path=self.path_options.views,
|
|
164
|
+
base=root,
|
|
165
|
+
),
|
|
166
|
+
requirements=resolve_git_path(
|
|
167
|
+
path=self.path_options.requirements,
|
|
168
|
+
base=root,
|
|
169
|
+
),
|
|
170
|
+
extenders=resolve_git_path(
|
|
171
|
+
path=self.path_options.extenders,
|
|
172
|
+
base=root,
|
|
173
|
+
default="fabricks/extenders",
|
|
174
|
+
),
|
|
175
|
+
masks=resolve_git_path(
|
|
176
|
+
path=self.path_options.masks,
|
|
177
|
+
base=root,
|
|
178
|
+
default="fabricks/masks",
|
|
179
|
+
),
|
|
180
|
+
storages=storage_paths,
|
|
181
|
+
runtimes=runtime_paths,
|
|
182
|
+
)
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
from pydantic import BaseModel, ConfigDict
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class ScheduleOptions(BaseModel):
|
|
5
|
+
"""Options for scheduling a notebook run."""
|
|
6
|
+
|
|
7
|
+
model_config = ConfigDict(extra="forbid", frozen=True)
|
|
8
|
+
|
|
9
|
+
steps: list[str] | None = None
|
|
10
|
+
tag: str | None = None
|
|
11
|
+
view: str | None = None
|
|
12
|
+
variables: dict[str, str | bool | int] | None = None
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class Schedule(BaseModel):
|
|
16
|
+
"""Schedule model representing a notebook schedule."""
|
|
17
|
+
|
|
18
|
+
model_config = ConfigDict(extra="forbid", frozen=True)
|
|
19
|
+
|
|
20
|
+
name: str
|
|
21
|
+
options: ScheduleOptions
|
fabricks/models/step.py
ADDED
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
"""Step configuration models."""
|
|
2
|
+
|
|
3
|
+
from pydantic import BaseModel, ConfigDict
|
|
4
|
+
|
|
5
|
+
from fabricks.models.common import BaseInvokerOptions, ExtenderOptions, SparkOptions
|
|
6
|
+
from fabricks.models.table import StepTableOptions
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class StepInvokerOptions(BaseModel):
|
|
10
|
+
"""Grouped invoker operations for pre/run/post execution."""
|
|
11
|
+
|
|
12
|
+
model_config = ConfigDict(extra="forbid", frozen=True)
|
|
13
|
+
|
|
14
|
+
pre_run: list[BaseInvokerOptions] | None = None
|
|
15
|
+
post_run: list[BaseInvokerOptions] | None = None
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class StepTimeoutOptions(BaseModel):
|
|
19
|
+
"""Optional timeout overrides for individual steps."""
|
|
20
|
+
|
|
21
|
+
model_config = ConfigDict(extra="forbid", frozen=True)
|
|
22
|
+
|
|
23
|
+
step: int | None = None
|
|
24
|
+
job: int | None = None
|
|
25
|
+
pre_run: int | None = None
|
|
26
|
+
post_run: int | None = None
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class StepPathOptions(BaseModel):
|
|
30
|
+
"""Path configuration for steps."""
|
|
31
|
+
|
|
32
|
+
model_config = ConfigDict(extra="forbid", frozen=True)
|
|
33
|
+
|
|
34
|
+
runtime: str
|
|
35
|
+
storage: str
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
class StepOptions(BaseModel):
|
|
39
|
+
"""Base step configuration options."""
|
|
40
|
+
|
|
41
|
+
model_config = ConfigDict(extra="forbid", frozen=True)
|
|
42
|
+
|
|
43
|
+
order: int
|
|
44
|
+
workers: int | None = None
|
|
45
|
+
timeouts: StepTimeoutOptions | None = None
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
class BronzeOptions(StepOptions):
|
|
49
|
+
"""Bronze layer step options."""
|
|
50
|
+
|
|
51
|
+
clean: bool | None = None
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
class SilverOptions(StepOptions):
|
|
55
|
+
"""Silver layer step options."""
|
|
56
|
+
|
|
57
|
+
parent: str
|
|
58
|
+
stream: bool | None = None
|
|
59
|
+
local_checkpoint: bool | None = None
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
class GoldOptions(StepOptions):
|
|
63
|
+
"""Gold layer step options."""
|
|
64
|
+
|
|
65
|
+
schema_drift: bool | None = None
|
|
66
|
+
metadata: bool | None = None
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
class Step(BaseModel):
|
|
70
|
+
"""Base step configuration."""
|
|
71
|
+
|
|
72
|
+
model_config = ConfigDict(extra="forbid", frozen=True)
|
|
73
|
+
|
|
74
|
+
name: str
|
|
75
|
+
path_options: StepPathOptions
|
|
76
|
+
table_options: StepTableOptions | None = None
|
|
77
|
+
extender_options: list[ExtenderOptions] | None = None
|
|
78
|
+
invoker_options: StepInvokerOptions | None = None
|
|
79
|
+
spark_options: SparkOptions | None = None
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
class BronzeConf(Step):
|
|
83
|
+
"""Bronze layer step configuration."""
|
|
84
|
+
|
|
85
|
+
options: BronzeOptions
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
class SilverConf(Step):
|
|
89
|
+
"""Silver layer step configuration."""
|
|
90
|
+
|
|
91
|
+
options: SilverOptions
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
class GoldConf(Step):
|
|
95
|
+
"""Gold layer step configuration."""
|
|
96
|
+
|
|
97
|
+
options: GoldOptions
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
class PowerBI(Step):
|
|
101
|
+
"""PowerBI configuration."""
|
|
102
|
+
|
|
103
|
+
path_options: StepPathOptions | None = None
|
fabricks/models/table.py
ADDED
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
"""Table-related options and constraint models."""
|
|
2
|
+
|
|
3
|
+
from pydantic import BaseModel, ConfigDict
|
|
4
|
+
|
|
5
|
+
from fabricks.models.common import AllowedConstraintOptions, AllowedForeignKeyOptions
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class ForeignKeyOptions(BaseModel):
|
|
9
|
+
"""Options for foreign key constraints."""
|
|
10
|
+
|
|
11
|
+
model_config = ConfigDict(extra="forbid", frozen=True)
|
|
12
|
+
|
|
13
|
+
foreign_key: AllowedForeignKeyOptions | None = None
|
|
14
|
+
constraint: AllowedConstraintOptions | None = None
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class PrimaryKeyOptions(BaseModel):
|
|
18
|
+
"""Options for primary key constraints."""
|
|
19
|
+
|
|
20
|
+
model_config = ConfigDict(extra="forbid", frozen=True)
|
|
21
|
+
|
|
22
|
+
constraint: AllowedConstraintOptions | None = None
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class ForeignKey(BaseModel):
|
|
26
|
+
"""Foreign key constraint definition."""
|
|
27
|
+
|
|
28
|
+
model_config = ConfigDict(extra="forbid", frozen=True)
|
|
29
|
+
|
|
30
|
+
keys: list[str]
|
|
31
|
+
reference: str
|
|
32
|
+
options: ForeignKeyOptions | None = None
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
class PrimaryKey(BaseModel):
|
|
36
|
+
"""Primary key constraint definition."""
|
|
37
|
+
|
|
38
|
+
model_config = ConfigDict(extra="forbid", frozen=True)
|
|
39
|
+
|
|
40
|
+
keys: list[str]
|
|
41
|
+
options: PrimaryKeyOptions | None = None
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
class TableOptions(BaseModel):
|
|
45
|
+
"""Comprehensive table configuration options for jobs."""
|
|
46
|
+
|
|
47
|
+
model_config = ConfigDict(extra="forbid", frozen=True)
|
|
48
|
+
|
|
49
|
+
identity: bool | None = None
|
|
50
|
+
liquid_clustering: bool | None = None
|
|
51
|
+
partition_by: list[str] | None = None
|
|
52
|
+
zorder_by: list[str] | None = None
|
|
53
|
+
cluster_by: list[str] | None = None
|
|
54
|
+
powerbi: bool | None = None
|
|
55
|
+
maximum_compatibility: bool | None = None
|
|
56
|
+
bloomfilter_by: list[str] | None = None
|
|
57
|
+
constraints: dict[str, str | bool | int] | None = None
|
|
58
|
+
properties: dict[str, str | bool | int] | None = None
|
|
59
|
+
comment: str | None = None
|
|
60
|
+
calculated_columns: dict[str, str | bool | int] | None = None
|
|
61
|
+
masks: dict[str, str] | None = None
|
|
62
|
+
comments: dict[str, str | bool | int] | None = None
|
|
63
|
+
retention_days: int | None = None
|
|
64
|
+
primary_key: dict[str, PrimaryKey] | None = None
|
|
65
|
+
foreign_keys: dict[str, ForeignKey] | None = None
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
class StepTableOptions(BaseModel):
|
|
69
|
+
"""Simplified table options for step-level configuration."""
|
|
70
|
+
|
|
71
|
+
model_config = ConfigDict(extra="forbid", frozen=True)
|
|
72
|
+
|
|
73
|
+
powerbi: bool | None = None
|
|
74
|
+
liquid_clustering: bool | None = None
|
|
75
|
+
properties: dict[str, str | bool | int] | None = None
|
|
76
|
+
retention_days: int | None = None
|
|
77
|
+
masks: dict[str, str] | None = None
|
fabricks/utils/helpers.py
CHANGED
|
@@ -8,7 +8,7 @@ from pyspark.sql import DataFrame
|
|
|
8
8
|
from typing_extensions import deprecated
|
|
9
9
|
|
|
10
10
|
from fabricks.utils._types import DataFrameLike
|
|
11
|
-
from fabricks.utils.path import
|
|
11
|
+
from fabricks.utils.path import GitPath
|
|
12
12
|
from fabricks.utils.spark import spark
|
|
13
13
|
|
|
14
14
|
|
|
@@ -197,12 +197,12 @@ def run_in_parallel(
|
|
|
197
197
|
return results
|
|
198
198
|
|
|
199
199
|
|
|
200
|
-
def run_notebook(path:
|
|
200
|
+
def run_notebook(path: GitPath, timeout: Optional[int] = None, **kwargs):
|
|
201
201
|
"""
|
|
202
202
|
Runs a notebook located at the given path.
|
|
203
203
|
|
|
204
204
|
Args:
|
|
205
|
-
path (
|
|
205
|
+
path (GitPath): The path to the notebook file.
|
|
206
206
|
timeout (Optional[int]): The maximum execution time for the notebook in seconds. Defaults to None.
|
|
207
207
|
**kwargs: Additional keyword arguments to be passed to the notebook.
|
|
208
208
|
|
|
@@ -229,10 +229,11 @@ def md5(s: Any) -> str:
|
|
|
229
229
|
return md5.hexdigest()
|
|
230
230
|
|
|
231
231
|
|
|
232
|
-
def load_module_from_path(name: str, path:
|
|
232
|
+
def load_module_from_path(name: str, path: GitPath):
|
|
233
233
|
from importlib.util import module_from_spec, spec_from_file_location
|
|
234
234
|
|
|
235
|
-
sys.path
|
|
235
|
+
if path.parent not in sys.path:
|
|
236
|
+
sys.path.insert(0, str(path.parent))
|
|
236
237
|
|
|
237
238
|
spec = spec_from_file_location(name, path.string)
|
|
238
239
|
assert spec, f"no valid module found in {path.string}"
|
fabricks/utils/log.py
CHANGED
|
@@ -60,7 +60,28 @@ class LogFormatter(logging.Formatter):
|
|
|
60
60
|
extra = ""
|
|
61
61
|
if hasattr(record, "exc_info") and record.exc_info:
|
|
62
62
|
exc_info = record.__dict__.get("exc_info", None)
|
|
63
|
-
extra += f" [{self.COLORS[logging.ERROR]}{exc_info[0].__name__}{self.RESET}]"
|
|
63
|
+
extra += f" [{self.COLORS[logging.ERROR]}{exc_info[0].__name__}{self.RESET}]" # type: ignore
|
|
64
|
+
|
|
65
|
+
if hasattr(record, "df"):
|
|
66
|
+
df = record.__dict__.get("df")
|
|
67
|
+
if isinstance(df, DataFrame):
|
|
68
|
+
try:
|
|
69
|
+
pandas_df = df.toPandas()
|
|
70
|
+
except Exception:
|
|
71
|
+
# Handle timestamp precision/timezone issues by casting to string
|
|
72
|
+
from pyspark.sql.functions import col
|
|
73
|
+
from pyspark.sql.types import TimestampType
|
|
74
|
+
|
|
75
|
+
for field in df.schema.fields:
|
|
76
|
+
if isinstance(field.dataType, TimestampType):
|
|
77
|
+
df = df.withColumn(field.name, col(field.name).cast("string"))
|
|
78
|
+
pandas_df = df.toPandas()
|
|
79
|
+
|
|
80
|
+
extra += f"\n---\n%df\n{pandas_df.to_string(index=True)}\n---"
|
|
81
|
+
|
|
82
|
+
if hasattr(record, "json"):
|
|
83
|
+
json_data = record.__dict__.get("json")
|
|
84
|
+
extra += f"\n---\n{json.dumps(json_data, indent=2, default=str)}\n---"
|
|
64
85
|
|
|
65
86
|
if self.debugmode:
|
|
66
87
|
if hasattr(record, "sql"):
|
|
@@ -72,11 +93,6 @@ class LogFormatter(logging.Formatter):
|
|
|
72
93
|
if hasattr(record, "context"):
|
|
73
94
|
extra += f"\n---\n{json.dumps(record.__dict__.get('context'), indent=2, default=str)}\n---"
|
|
74
95
|
|
|
75
|
-
if hasattr(record, "df"):
|
|
76
|
-
df = record.__dict__.get("df")
|
|
77
|
-
if isinstance(df, DataFrame):
|
|
78
|
-
extra += f"\n---\n%df\n{df.toPandas().to_string(index=True)}\n---"
|
|
79
|
-
|
|
80
96
|
record.levelname = levelname_formatted
|
|
81
97
|
record.prefix = prefix
|
|
82
98
|
record.timestamp = self.formatTime(record)
|
|
@@ -156,6 +172,9 @@ class AzureTableLogHandler(logging.Handler):
|
|
|
156
172
|
}
|
|
157
173
|
r["Exception"] = json.dumps(d)
|
|
158
174
|
|
|
175
|
+
if hasattr(record, "json"):
|
|
176
|
+
r["Data"] = json.dumps(record.__dict__.get("json", ""))
|
|
177
|
+
|
|
159
178
|
if self.debugmode:
|
|
160
179
|
if hasattr(record, "content"):
|
|
161
180
|
r["Content"] = json.dumps(record.__dict__.get("content", ""))[:1000]
|