fabricks 3.0.18__py3-none-any.whl → 4.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- fabricks/api/context.py +15 -3
- fabricks/api/notebooks/schedule.py +2 -3
- fabricks/api/parsers.py +2 -1
- fabricks/api/utils.py +3 -1
- fabricks/cdc/__init__.py +1 -2
- fabricks/cdc/base/__init__.py +1 -2
- fabricks/cdc/base/_types.py +5 -3
- fabricks/cdc/base/configurator.py +5 -0
- fabricks/cdc/base/generator.py +7 -3
- fabricks/cdc/base/merger.py +2 -0
- fabricks/cdc/base/processor.py +15 -0
- fabricks/cdc/templates/README.md +490 -0
- fabricks/cdc/templates/ctes/base.sql.jinja +1 -0
- fabricks/cdc/templates/ctes/current.sql.jinja +4 -0
- fabricks/cdc/templates/merges/scd1.sql.jinja +6 -0
- fabricks/cdc/templates/merges/scd2.sql.jinja +6 -0
- fabricks/cdc/templates/queries/context.sql.jinja +104 -96
- fabricks/cdc/templates/query.sql.jinja +1 -1
- fabricks/context/__init__.py +13 -1
- fabricks/context/config.py +13 -122
- fabricks/context/log.py +92 -1
- fabricks/context/runtime.py +35 -69
- fabricks/context/spark_session.py +8 -7
- fabricks/context/utils.py +26 -39
- fabricks/core/__init__.py +2 -2
- fabricks/core/dags/base.py +5 -5
- fabricks/core/dags/processor.py +2 -3
- fabricks/core/extenders.py +1 -1
- fabricks/core/job_schema.py +26 -16
- fabricks/core/jobs/__init__.py +1 -7
- fabricks/core/jobs/base/README.md +1545 -0
- fabricks/core/jobs/base/__init__.py +1 -8
- fabricks/core/jobs/base/checker.py +7 -7
- fabricks/core/jobs/base/configurator.py +142 -63
- fabricks/core/jobs/base/generator.py +38 -34
- fabricks/core/jobs/base/invoker.py +48 -63
- fabricks/core/jobs/base/processor.py +13 -28
- fabricks/core/jobs/bronze.py +88 -38
- fabricks/core/jobs/get_job.py +3 -6
- fabricks/core/jobs/get_job_conf.py +19 -68
- fabricks/core/jobs/get_jobs.py +10 -11
- fabricks/core/jobs/get_schedules.py +3 -17
- fabricks/core/jobs/gold.py +96 -43
- fabricks/core/jobs/silver.py +42 -22
- fabricks/core/masks.py +11 -8
- fabricks/core/parsers/__init__.py +0 -2
- fabricks/core/parsers/base.py +10 -10
- fabricks/core/parsers/decorator.py +1 -1
- fabricks/core/parsers/get_parser.py +4 -5
- fabricks/core/schedules/process.py +1 -4
- fabricks/core/steps/base.py +27 -17
- fabricks/core/steps/get_step.py +2 -4
- fabricks/core/steps/get_step_conf.py +3 -7
- fabricks/core/udfs.py +9 -8
- fabricks/core/views.py +2 -2
- fabricks/deploy/__init__.py +27 -16
- fabricks/deploy/masks.py +1 -1
- fabricks/deploy/notebooks.py +19 -16
- fabricks/deploy/schedules.py +1 -1
- fabricks/deploy/tables.py +66 -49
- fabricks/deploy/udfs.py +2 -2
- fabricks/deploy/views.py +15 -16
- fabricks/metastore/database.py +3 -3
- fabricks/metastore/table.py +103 -68
- fabricks/models/__init__.py +125 -0
- fabricks/models/common.py +79 -0
- fabricks/models/config.py +225 -0
- fabricks/models/dependency.py +50 -0
- fabricks/models/job.py +157 -0
- fabricks/models/path.py +17 -0
- fabricks/models/runtime.py +182 -0
- fabricks/models/schedule.py +21 -0
- fabricks/models/step.py +103 -0
- fabricks/models/table.py +77 -0
- fabricks/{core/jobs/get_job_id.py → models/utils.py} +2 -0
- fabricks/utils/helpers.py +6 -5
- fabricks/utils/log.py +25 -6
- fabricks/utils/path.py +269 -102
- fabricks/utils/pip.py +7 -7
- fabricks/utils/read/read.py +23 -22
- fabricks/utils/read/read_yaml.py +2 -2
- fabricks/utils/write/delta.py +4 -4
- fabricks/utils/write/stream.py +2 -2
- {fabricks-3.0.18.dist-info → fabricks-4.0.0.dist-info}/METADATA +9 -4
- {fabricks-3.0.18.dist-info → fabricks-4.0.0.dist-info}/RECORD +86 -83
- fabricks/context/_types.py +0 -137
- fabricks/context/helpers.py +0 -63
- fabricks/core/jobs/base/_types.py +0 -284
- fabricks/core/parsers/_types.py +0 -6
- fabricks/utils/fdict.py +0 -240
- fabricks/utils/pydantic.py +0 -94
- fabricks/utils/schema/__init__.py +0 -7
- fabricks/utils/schema/get_json_schema_for_type.py +0 -161
- fabricks/utils/schema/get_schema_for_type.py +0 -99
- {fabricks-3.0.18.dist-info → fabricks-4.0.0.dist-info}/WHEEL +0 -0
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
"""Common types and type aliases used across all models."""
|
|
2
|
+
|
|
3
|
+
from typing import Literal
|
|
4
|
+
|
|
5
|
+
from pydantic import BaseModel, ConfigDict
|
|
6
|
+
|
|
7
|
+
# Mode type definitions
|
|
8
|
+
AllowedModesBronze = Literal["memory", "append", "register"]
|
|
9
|
+
AllowedModesSilver = Literal["memory", "append", "latest", "update", "combine"]
|
|
10
|
+
AllowedModesGold = Literal["memory", "append", "complete", "update", "invoke"]
|
|
11
|
+
AllowedModes = Literal[AllowedModesBronze, AllowedModesSilver, AllowedModesGold]
|
|
12
|
+
|
|
13
|
+
# File and operation types
|
|
14
|
+
AllowedFileFormats = Literal["json_array", "json", "jsonl", "csv", "parquet", "delta"]
|
|
15
|
+
AllowedOperations = Literal["upsert", "reload", "delete"]
|
|
16
|
+
AllowedTypes = Literal["manual", "default"]
|
|
17
|
+
AllowedOrigins = Literal["parser", "job"]
|
|
18
|
+
|
|
19
|
+
# Constraint types
|
|
20
|
+
AllowedConstraintOptions = Literal["not enforced", "deferrable", "initially deferred", "norely", "rely"]
|
|
21
|
+
AllowedForeignKeyOptions = Literal["match full", "on update no action", "on delete no action"]
|
|
22
|
+
|
|
23
|
+
# Change Data Capture types
|
|
24
|
+
AllowedChangeDataCaptures = Literal["nocdc", "scd1", "scd2", "none"]
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class SparkOptions(BaseModel):
|
|
28
|
+
"""Spark SQL and configuration options."""
|
|
29
|
+
|
|
30
|
+
model_config = ConfigDict(extra="forbid", frozen=True)
|
|
31
|
+
|
|
32
|
+
sql: dict[str, str | bool | int] | None = None
|
|
33
|
+
conf: dict[str, str | bool | int] | None = None
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
class BaseInvokerOptions(BaseModel):
|
|
37
|
+
"""Options for invoking notebooks during pre/post run operations."""
|
|
38
|
+
|
|
39
|
+
model_config = ConfigDict(extra="forbid", frozen=True)
|
|
40
|
+
|
|
41
|
+
notebook: str | None = None
|
|
42
|
+
timeout: int | None = None
|
|
43
|
+
arguments: dict[str, str | bool | int] | None = None
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
class InvokerOptions(BaseModel):
|
|
47
|
+
"""Grouped invoker operations for pre/run/post execution."""
|
|
48
|
+
|
|
49
|
+
model_config = ConfigDict(extra="forbid", frozen=True)
|
|
50
|
+
|
|
51
|
+
pre_run: list[BaseInvokerOptions] | None = None
|
|
52
|
+
run: list[BaseInvokerOptions] | None = None
|
|
53
|
+
post_run: list[BaseInvokerOptions] | None = None
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
class ExtenderOptions(BaseModel):
|
|
57
|
+
"""Configuration for runtime extenders."""
|
|
58
|
+
|
|
59
|
+
model_config = ConfigDict(extra="forbid", frozen=True)
|
|
60
|
+
|
|
61
|
+
extender: str
|
|
62
|
+
arguments: dict[str, str] | None = None
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
class DatabasePathOptions(BaseModel):
|
|
66
|
+
"""Path configuration for databases."""
|
|
67
|
+
|
|
68
|
+
model_config = ConfigDict(extra="forbid", frozen=True)
|
|
69
|
+
|
|
70
|
+
storage: str
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
class Database(BaseModel):
|
|
74
|
+
"""Database configuration."""
|
|
75
|
+
|
|
76
|
+
model_config = ConfigDict(extra="forbid", frozen=True)
|
|
77
|
+
|
|
78
|
+
name: str
|
|
79
|
+
path_options: DatabasePathOptions
|
|
@@ -0,0 +1,225 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
import os
|
|
3
|
+
import pathlib
|
|
4
|
+
from pathlib import Path as PathLibPath
|
|
5
|
+
|
|
6
|
+
from pydantic import AliasChoices, BaseModel, ConfigDict, Field, computed_field, field_validator
|
|
7
|
+
from pydantic_settings import BaseSettings, PydanticBaseSettingsSource, SettingsConfigDict
|
|
8
|
+
|
|
9
|
+
from fabricks.utils.path import GitPath, resolve_git_path
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class HierarchicalFileSettingsSource(PydanticBaseSettingsSource):
|
|
13
|
+
"""Custom settings source for hierarchical file configuration."""
|
|
14
|
+
|
|
15
|
+
def get_field_value(self, field):
|
|
16
|
+
# Not used in this implementation
|
|
17
|
+
return None, None, False
|
|
18
|
+
|
|
19
|
+
def __call__(self):
|
|
20
|
+
"""Load settings from hierarchical file search."""
|
|
21
|
+
data = self._load_hierarchical_file()
|
|
22
|
+
return data
|
|
23
|
+
|
|
24
|
+
def _load_hierarchical_file(self):
|
|
25
|
+
"""Search up directory hierarchy for configuration files."""
|
|
26
|
+
|
|
27
|
+
def pyproject_settings(base: PathLibPath):
|
|
28
|
+
pyproject_path = base / "pyproject.toml"
|
|
29
|
+
if pyproject_path.exists():
|
|
30
|
+
import sys
|
|
31
|
+
|
|
32
|
+
if sys.version_info >= (3, 11):
|
|
33
|
+
import tomllib
|
|
34
|
+
else:
|
|
35
|
+
import tomli as tomllib # type: ignore
|
|
36
|
+
|
|
37
|
+
with open(pyproject_path, "rb") as f:
|
|
38
|
+
data = tomllib.load(f)
|
|
39
|
+
|
|
40
|
+
data = data.get("tool", {}).get("fabricks", {})
|
|
41
|
+
data["base"] = str(base)
|
|
42
|
+
return data
|
|
43
|
+
|
|
44
|
+
return None
|
|
45
|
+
|
|
46
|
+
def json_settings(base: PathLibPath):
|
|
47
|
+
json_path = base / "fabricksconfig.json"
|
|
48
|
+
if json_path.exists():
|
|
49
|
+
import json
|
|
50
|
+
|
|
51
|
+
with open(json_path, "r") as f:
|
|
52
|
+
data = json.load(f)
|
|
53
|
+
|
|
54
|
+
data["base"] = str(base)
|
|
55
|
+
return data
|
|
56
|
+
|
|
57
|
+
return None
|
|
58
|
+
|
|
59
|
+
path = pathlib.Path(os.getcwd())
|
|
60
|
+
data = None
|
|
61
|
+
|
|
62
|
+
while not data:
|
|
63
|
+
data = json_settings(path)
|
|
64
|
+
if data:
|
|
65
|
+
break
|
|
66
|
+
|
|
67
|
+
data = pyproject_settings(path)
|
|
68
|
+
if data:
|
|
69
|
+
break
|
|
70
|
+
|
|
71
|
+
if path == path.parent:
|
|
72
|
+
break
|
|
73
|
+
|
|
74
|
+
path = path.parent
|
|
75
|
+
|
|
76
|
+
return data or {}
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
class ResolvedPathOptions(BaseModel):
|
|
80
|
+
"""Resolved path objects for main configuration."""
|
|
81
|
+
|
|
82
|
+
model_config = ConfigDict(extra="forbid", frozen=True, arbitrary_types_allowed=True)
|
|
83
|
+
|
|
84
|
+
base: GitPath
|
|
85
|
+
config: GitPath
|
|
86
|
+
runtime: GitPath
|
|
87
|
+
notebooks: GitPath
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
class ConfigOptions(BaseSettings):
|
|
91
|
+
"""Main configuration options for Fabricks framework."""
|
|
92
|
+
|
|
93
|
+
model_config = SettingsConfigDict(env_file=".env", extra="ignore")
|
|
94
|
+
|
|
95
|
+
base: str = Field(
|
|
96
|
+
validation_alias=AliasChoices("FABRICKS_BASE", "base"),
|
|
97
|
+
default="none",
|
|
98
|
+
)
|
|
99
|
+
config: str = Field(
|
|
100
|
+
validation_alias=AliasChoices("FABRICKS_CONFIG", "config"),
|
|
101
|
+
default="none",
|
|
102
|
+
)
|
|
103
|
+
runtime: str = Field(
|
|
104
|
+
validation_alias=AliasChoices("FABRICKS_RUNTIME", "runtime"),
|
|
105
|
+
default="none",
|
|
106
|
+
)
|
|
107
|
+
notebooks: str = Field(
|
|
108
|
+
validation_alias=AliasChoices("FABRICKS_NOTEBOOKS", "notebooks"),
|
|
109
|
+
default="none",
|
|
110
|
+
)
|
|
111
|
+
job_config_from_yaml: bool = Field(
|
|
112
|
+
validation_alias=AliasChoices("FABRICKS_IS_JOB_CONFIG_FROM_YAML", "job_config_from_yaml"),
|
|
113
|
+
default=False,
|
|
114
|
+
)
|
|
115
|
+
debugmode: bool = Field(
|
|
116
|
+
validation_alias=AliasChoices("FABRICKS_IS_DEBUGMODE", "debugmode"),
|
|
117
|
+
default=False,
|
|
118
|
+
)
|
|
119
|
+
funmode: bool = Field(
|
|
120
|
+
validation_alias=AliasChoices("FABRICKS_IS_FUNMODE", "funmode"),
|
|
121
|
+
default=False,
|
|
122
|
+
)
|
|
123
|
+
devmode: bool = Field(
|
|
124
|
+
validation_alias=AliasChoices("FABRICKS_IS_DEVMODE", "devmode"),
|
|
125
|
+
default=False,
|
|
126
|
+
)
|
|
127
|
+
loglevel: int = Field(
|
|
128
|
+
validation_alias=AliasChoices("FABRICKS_LOGLEVEL", "loglevel"),
|
|
129
|
+
default=20,
|
|
130
|
+
)
|
|
131
|
+
|
|
132
|
+
@field_validator("job_config_from_yaml", "debugmode", "funmode", "devmode", mode="before")
|
|
133
|
+
@classmethod
|
|
134
|
+
def validate_bool(cls, v):
|
|
135
|
+
"""
|
|
136
|
+
Convert common string representations of boolean values to bool.
|
|
137
|
+
|
|
138
|
+
Accepted case-insensitive string values are:
|
|
139
|
+
- "true", "1", "yes" -> True
|
|
140
|
+
- "false", "0", "no" -> False
|
|
141
|
+
|
|
142
|
+
Non-string inputs or strings not matching the above values are returned unchanged.
|
|
143
|
+
"""
|
|
144
|
+
if isinstance(v, bool):
|
|
145
|
+
return v
|
|
146
|
+
|
|
147
|
+
if isinstance(v, str):
|
|
148
|
+
if v.lower() in ("true", "1", "yes"):
|
|
149
|
+
return True
|
|
150
|
+
elif v.lower() in ("false", "0", "no"):
|
|
151
|
+
return False
|
|
152
|
+
|
|
153
|
+
return v
|
|
154
|
+
|
|
155
|
+
@field_validator("loglevel", mode="before")
|
|
156
|
+
@classmethod
|
|
157
|
+
def validate_loglevel(cls, v):
|
|
158
|
+
"""Validate log level."""
|
|
159
|
+
if isinstance(v, str):
|
|
160
|
+
levels = {
|
|
161
|
+
"DEBUG": logging.DEBUG,
|
|
162
|
+
"INFO": logging.INFO,
|
|
163
|
+
"WARNING": logging.WARNING,
|
|
164
|
+
"ERROR": logging.ERROR,
|
|
165
|
+
"CRITICAL": logging.CRITICAL,
|
|
166
|
+
}
|
|
167
|
+
v_upper = v.upper()
|
|
168
|
+
if v_upper in levels:
|
|
169
|
+
return levels[v_upper]
|
|
170
|
+
|
|
171
|
+
return logging.INFO # Default log level
|
|
172
|
+
|
|
173
|
+
return v
|
|
174
|
+
|
|
175
|
+
@field_validator("notebooks", mode="before")
|
|
176
|
+
@classmethod
|
|
177
|
+
def validate_notebooks(cls, v):
|
|
178
|
+
"""Set default notebooks path if not provided."""
|
|
179
|
+
if not v or v == "none":
|
|
180
|
+
return "runtime/notebooks"
|
|
181
|
+
|
|
182
|
+
return v
|
|
183
|
+
|
|
184
|
+
@classmethod
|
|
185
|
+
def settings_customise_sources(
|
|
186
|
+
cls,
|
|
187
|
+
settings_cls: type[BaseSettings],
|
|
188
|
+
init_settings: PydanticBaseSettingsSource,
|
|
189
|
+
env_settings: PydanticBaseSettingsSource,
|
|
190
|
+
dotenv_settings: PydanticBaseSettingsSource,
|
|
191
|
+
file_secret_settings: PydanticBaseSettingsSource,
|
|
192
|
+
):
|
|
193
|
+
# Order: env vars > hierarchical file > defaults
|
|
194
|
+
return (
|
|
195
|
+
init_settings,
|
|
196
|
+
env_settings,
|
|
197
|
+
HierarchicalFileSettingsSource(settings_cls),
|
|
198
|
+
file_secret_settings,
|
|
199
|
+
)
|
|
200
|
+
|
|
201
|
+
def _resolve_paths(self) -> ResolvedPathOptions:
|
|
202
|
+
"""
|
|
203
|
+
Get all paths resolved as Path objects.
|
|
204
|
+
|
|
205
|
+
Args:
|
|
206
|
+
runtime: The base runtime path (e.g., PATH_RUNTIME)
|
|
207
|
+
|
|
208
|
+
Returns:
|
|
209
|
+
ResolvedPathOptions with all paths resolved
|
|
210
|
+
"""
|
|
211
|
+
# Collect all storage paths with variable substitution
|
|
212
|
+
root = GitPath(self.base)
|
|
213
|
+
|
|
214
|
+
return ResolvedPathOptions(
|
|
215
|
+
base=resolve_git_path(path=self.base),
|
|
216
|
+
config=resolve_git_path(path=self.config, base=root),
|
|
217
|
+
runtime=resolve_git_path(path=self.runtime, base=root),
|
|
218
|
+
notebooks=resolve_git_path(path=self.notebooks, base=root),
|
|
219
|
+
)
|
|
220
|
+
|
|
221
|
+
@computed_field
|
|
222
|
+
@property
|
|
223
|
+
def resolved_paths(self) -> ResolvedPathOptions:
|
|
224
|
+
"""Get all paths resolved as Path objects."""
|
|
225
|
+
return self._resolve_paths()
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
"""Job dependency tracking models."""
|
|
2
|
+
|
|
3
|
+
from pydantic import BaseModel, ConfigDict, model_validator
|
|
4
|
+
from pyspark.sql.types import StringType, StructField, StructType
|
|
5
|
+
|
|
6
|
+
from fabricks.models.common import AllowedOrigins
|
|
7
|
+
from fabricks.models.utils import get_dependency_id, get_job_id
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class JobDependency(BaseModel):
|
|
11
|
+
"""Job dependency tracking."""
|
|
12
|
+
|
|
13
|
+
model_config = ConfigDict(extra="forbid", frozen=True)
|
|
14
|
+
|
|
15
|
+
origin: AllowedOrigins
|
|
16
|
+
job_id: str
|
|
17
|
+
parent: str
|
|
18
|
+
parent_id: str
|
|
19
|
+
dependency_id: str
|
|
20
|
+
|
|
21
|
+
def __str__(self) -> str:
|
|
22
|
+
return f"{self.job_id} -> {self.parent}"
|
|
23
|
+
|
|
24
|
+
@model_validator(mode="after")
|
|
25
|
+
def check_no_circular_dependency(self):
|
|
26
|
+
if self.job_id == self.parent_id:
|
|
27
|
+
raise ValueError("Circular dependency detected")
|
|
28
|
+
return self
|
|
29
|
+
|
|
30
|
+
@staticmethod
|
|
31
|
+
def from_parts(job_id: str, parent: str, origin: AllowedOrigins):
|
|
32
|
+
parent = parent.removesuffix("__current")
|
|
33
|
+
return JobDependency(
|
|
34
|
+
job_id=job_id,
|
|
35
|
+
origin=origin,
|
|
36
|
+
parent=parent,
|
|
37
|
+
parent_id=get_job_id(job=parent),
|
|
38
|
+
dependency_id=get_dependency_id(parent=parent, job_id=job_id),
|
|
39
|
+
)
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
SchemaDependencies = StructType(
|
|
43
|
+
[
|
|
44
|
+
StructField("dependency_id", StringType(), True),
|
|
45
|
+
StructField("origin", StringType(), True),
|
|
46
|
+
StructField("job_id", StringType(), True),
|
|
47
|
+
StructField("parent_id", StringType(), True),
|
|
48
|
+
StructField("parent", StringType(), True),
|
|
49
|
+
]
|
|
50
|
+
)
|
fabricks/models/job.py
ADDED
|
@@ -0,0 +1,157 @@
|
|
|
1
|
+
"""Job configuration models."""
|
|
2
|
+
|
|
3
|
+
from pydantic import BaseModel, ConfigDict, Field, computed_field
|
|
4
|
+
|
|
5
|
+
from fabricks.models.common import (
|
|
6
|
+
AllowedChangeDataCaptures,
|
|
7
|
+
AllowedModes,
|
|
8
|
+
AllowedModesBronze,
|
|
9
|
+
AllowedModesGold,
|
|
10
|
+
AllowedModesSilver,
|
|
11
|
+
AllowedOperations,
|
|
12
|
+
AllowedTypes,
|
|
13
|
+
ExtenderOptions,
|
|
14
|
+
InvokerOptions,
|
|
15
|
+
SparkOptions,
|
|
16
|
+
)
|
|
17
|
+
from fabricks.models.table import TableOptions
|
|
18
|
+
from fabricks.models.utils import get_job_id
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class CheckOptions(BaseModel):
|
|
22
|
+
"""Data quality check options for jobs."""
|
|
23
|
+
|
|
24
|
+
model_config = ConfigDict(extra="forbid", frozen=True)
|
|
25
|
+
|
|
26
|
+
skip: bool | None = None
|
|
27
|
+
pre_run: bool | None = None
|
|
28
|
+
post_run: bool | None = None
|
|
29
|
+
min_rows: int | None = None
|
|
30
|
+
max_rows: int | None = None
|
|
31
|
+
count_must_equal: str | None = None
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
class ParserOptions(BaseModel):
|
|
35
|
+
model_config = ConfigDict(extra="forbid", frozen=True)
|
|
36
|
+
file_format: str | None = None
|
|
37
|
+
read_options: dict[str, str] | None = None
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
class BaseOptions(BaseModel):
|
|
41
|
+
"""Base job options."""
|
|
42
|
+
|
|
43
|
+
model_config = ConfigDict(extra="forbid", frozen=True)
|
|
44
|
+
|
|
45
|
+
mode: AllowedModes
|
|
46
|
+
change_data_capture: AllowedChangeDataCaptures | None = Field(default="none")
|
|
47
|
+
|
|
48
|
+
parents: list[str] | None = None
|
|
49
|
+
optimize: bool | None = None
|
|
50
|
+
compute_statistics: bool | None = None
|
|
51
|
+
vacuum: bool | None = None
|
|
52
|
+
no_drop: bool | None = None
|
|
53
|
+
timeout: int | None = None
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
class BronzeOptions(BaseOptions):
|
|
57
|
+
"""Bronze layer job options."""
|
|
58
|
+
|
|
59
|
+
model_config = ConfigDict(extra="forbid", frozen=True)
|
|
60
|
+
|
|
61
|
+
mode: AllowedModesBronze
|
|
62
|
+
type: AllowedTypes | None = None
|
|
63
|
+
|
|
64
|
+
uri: str
|
|
65
|
+
keys: list[str] | None = None
|
|
66
|
+
|
|
67
|
+
parser: str | None = None
|
|
68
|
+
source: str | None = None
|
|
69
|
+
filter_where: str | None = None
|
|
70
|
+
encrypted_columns: list[str] | None = None
|
|
71
|
+
calculated_columns: dict[str, str] | None = None
|
|
72
|
+
operation: AllowedOperations | None = None
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
class SilverOptions(BaseOptions):
|
|
76
|
+
"""Silver layer job options."""
|
|
77
|
+
|
|
78
|
+
model_config = ConfigDict(extra="forbid", frozen=True)
|
|
79
|
+
|
|
80
|
+
mode: AllowedModesSilver
|
|
81
|
+
type: AllowedTypes | None = None
|
|
82
|
+
|
|
83
|
+
filter_where: str | None = None
|
|
84
|
+
deduplicate: bool | None = None
|
|
85
|
+
stream: bool | None = None
|
|
86
|
+
order_duplicate_by: dict[str, str] | None = None
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
class GoldOptions(BaseOptions):
|
|
90
|
+
"""Gold layer job options."""
|
|
91
|
+
|
|
92
|
+
model_config = ConfigDict(extra="forbid", frozen=True)
|
|
93
|
+
|
|
94
|
+
mode: AllowedModesGold
|
|
95
|
+
type: AllowedTypes | None = None
|
|
96
|
+
|
|
97
|
+
update_where: str | None = None
|
|
98
|
+
deduplicate: bool | None = None
|
|
99
|
+
rectify_as_upserts: bool | None = None
|
|
100
|
+
correct_valid_from: bool | None = None
|
|
101
|
+
persist_last_timestamp: bool | None = None
|
|
102
|
+
persist_last_updated_timestamp: bool | None = None
|
|
103
|
+
table: str | None = None
|
|
104
|
+
notebook: bool | None = None
|
|
105
|
+
requirements: bool | None = None
|
|
106
|
+
metadata: bool | None = None
|
|
107
|
+
last_updated: bool | None = None
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
TOptions = BronzeOptions | SilverOptions | GoldOptions
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
class JobConfBase(BaseModel):
|
|
114
|
+
"""Base job configuration with computed fields."""
|
|
115
|
+
|
|
116
|
+
model_config = ConfigDict(extra="forbid", frozen=True)
|
|
117
|
+
|
|
118
|
+
step: str
|
|
119
|
+
topic: str
|
|
120
|
+
item: str
|
|
121
|
+
|
|
122
|
+
@computed_field # type: ignore[misc]
|
|
123
|
+
@property
|
|
124
|
+
def job_id(self) -> str:
|
|
125
|
+
"""Computed job_id from step, topic, and item."""
|
|
126
|
+
return get_job_id(step=self.step, topic=self.topic, item=self.item)
|
|
127
|
+
|
|
128
|
+
options: TOptions
|
|
129
|
+
table_options: TableOptions | None = None
|
|
130
|
+
check_options: CheckOptions | None = None
|
|
131
|
+
spark_options: SparkOptions | None = None
|
|
132
|
+
invoker_options: InvokerOptions | None = None
|
|
133
|
+
extender_options: list[ExtenderOptions] | None = None
|
|
134
|
+
tags: list[str] | None = None
|
|
135
|
+
comment: str | None = None
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
class JobConfBronze(JobConfBase):
|
|
139
|
+
"""Bronze-specific job configuration."""
|
|
140
|
+
|
|
141
|
+
options: BronzeOptions
|
|
142
|
+
parser_options: ParserOptions | None = None
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
class JobConfSilver(JobConfBase):
|
|
146
|
+
"""Silver-specific job configuration."""
|
|
147
|
+
|
|
148
|
+
options: SilverOptions
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
class JobConfGold(JobConfBase):
|
|
152
|
+
"""Gold-specific job configuration."""
|
|
153
|
+
|
|
154
|
+
options: GoldOptions
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
JobConf = JobConfBronze | JobConfSilver | JobConfGold
|
fabricks/models/path.py
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
"""Path configuration models."""
|
|
2
|
+
|
|
3
|
+
from dataclasses import dataclass
|
|
4
|
+
|
|
5
|
+
from fabricks.utils.path import FileSharePath, GitPath
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
@dataclass(frozen=True)
|
|
9
|
+
class Paths:
|
|
10
|
+
"""Runtime path references."""
|
|
11
|
+
|
|
12
|
+
to_storage: "FileSharePath"
|
|
13
|
+
to_tmp: "FileSharePath"
|
|
14
|
+
to_checkpoints: "FileSharePath"
|
|
15
|
+
to_commits: "FileSharePath"
|
|
16
|
+
to_schema: "FileSharePath"
|
|
17
|
+
to_runtime: "GitPath"
|