fabricks 3.0.18__py3-none-any.whl → 4.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- fabricks/api/context.py +15 -3
- fabricks/api/notebooks/schedule.py +2 -3
- fabricks/api/parsers.py +2 -1
- fabricks/api/utils.py +3 -1
- fabricks/cdc/__init__.py +1 -2
- fabricks/cdc/base/__init__.py +1 -2
- fabricks/cdc/base/_types.py +5 -3
- fabricks/cdc/base/configurator.py +5 -0
- fabricks/cdc/base/generator.py +7 -3
- fabricks/cdc/base/merger.py +2 -0
- fabricks/cdc/base/processor.py +15 -0
- fabricks/cdc/templates/README.md +490 -0
- fabricks/cdc/templates/ctes/base.sql.jinja +1 -0
- fabricks/cdc/templates/ctes/current.sql.jinja +4 -0
- fabricks/cdc/templates/merges/scd1.sql.jinja +6 -0
- fabricks/cdc/templates/merges/scd2.sql.jinja +6 -0
- fabricks/cdc/templates/queries/context.sql.jinja +104 -96
- fabricks/cdc/templates/query.sql.jinja +1 -1
- fabricks/context/__init__.py +13 -1
- fabricks/context/config.py +13 -122
- fabricks/context/log.py +92 -1
- fabricks/context/runtime.py +35 -69
- fabricks/context/spark_session.py +8 -7
- fabricks/context/utils.py +26 -39
- fabricks/core/__init__.py +2 -2
- fabricks/core/dags/base.py +5 -5
- fabricks/core/dags/processor.py +2 -3
- fabricks/core/extenders.py +1 -1
- fabricks/core/job_schema.py +26 -16
- fabricks/core/jobs/__init__.py +1 -7
- fabricks/core/jobs/base/README.md +1545 -0
- fabricks/core/jobs/base/__init__.py +1 -8
- fabricks/core/jobs/base/checker.py +7 -7
- fabricks/core/jobs/base/configurator.py +142 -63
- fabricks/core/jobs/base/generator.py +38 -34
- fabricks/core/jobs/base/invoker.py +48 -63
- fabricks/core/jobs/base/processor.py +13 -28
- fabricks/core/jobs/bronze.py +88 -38
- fabricks/core/jobs/get_job.py +3 -6
- fabricks/core/jobs/get_job_conf.py +19 -68
- fabricks/core/jobs/get_jobs.py +10 -11
- fabricks/core/jobs/get_schedules.py +3 -17
- fabricks/core/jobs/gold.py +96 -43
- fabricks/core/jobs/silver.py +42 -22
- fabricks/core/masks.py +11 -8
- fabricks/core/parsers/__init__.py +0 -2
- fabricks/core/parsers/base.py +10 -10
- fabricks/core/parsers/decorator.py +1 -1
- fabricks/core/parsers/get_parser.py +4 -5
- fabricks/core/schedules/process.py +1 -4
- fabricks/core/steps/base.py +27 -17
- fabricks/core/steps/get_step.py +2 -4
- fabricks/core/steps/get_step_conf.py +3 -7
- fabricks/core/udfs.py +9 -8
- fabricks/core/views.py +2 -2
- fabricks/deploy/__init__.py +27 -16
- fabricks/deploy/masks.py +1 -1
- fabricks/deploy/notebooks.py +19 -16
- fabricks/deploy/schedules.py +1 -1
- fabricks/deploy/tables.py +66 -49
- fabricks/deploy/udfs.py +2 -2
- fabricks/deploy/views.py +15 -16
- fabricks/metastore/database.py +3 -3
- fabricks/metastore/table.py +103 -68
- fabricks/models/__init__.py +125 -0
- fabricks/models/common.py +79 -0
- fabricks/models/config.py +225 -0
- fabricks/models/dependency.py +50 -0
- fabricks/models/job.py +157 -0
- fabricks/models/path.py +17 -0
- fabricks/models/runtime.py +182 -0
- fabricks/models/schedule.py +21 -0
- fabricks/models/step.py +103 -0
- fabricks/models/table.py +77 -0
- fabricks/{core/jobs/get_job_id.py → models/utils.py} +2 -0
- fabricks/utils/helpers.py +6 -5
- fabricks/utils/log.py +25 -6
- fabricks/utils/path.py +269 -102
- fabricks/utils/pip.py +7 -7
- fabricks/utils/read/read.py +23 -22
- fabricks/utils/read/read_yaml.py +2 -2
- fabricks/utils/write/delta.py +4 -4
- fabricks/utils/write/stream.py +2 -2
- {fabricks-3.0.18.dist-info → fabricks-4.0.0.dist-info}/METADATA +9 -4
- {fabricks-3.0.18.dist-info → fabricks-4.0.0.dist-info}/RECORD +86 -83
- fabricks/context/_types.py +0 -137
- fabricks/context/helpers.py +0 -63
- fabricks/core/jobs/base/_types.py +0 -284
- fabricks/core/parsers/_types.py +0 -6
- fabricks/utils/fdict.py +0 -240
- fabricks/utils/pydantic.py +0 -94
- fabricks/utils/schema/__init__.py +0 -7
- fabricks/utils/schema/get_json_schema_for_type.py +0 -161
- fabricks/utils/schema/get_schema_for_type.py +0 -99
- {fabricks-3.0.18.dist-info → fabricks-4.0.0.dist-info}/WHEEL +0 -0
fabricks/context/runtime.py
CHANGED
|
@@ -2,92 +2,58 @@ from typing import Final, Optional
|
|
|
2
2
|
|
|
3
3
|
import yaml
|
|
4
4
|
|
|
5
|
-
from fabricks.context.config import PATH_CONFIG
|
|
6
|
-
from fabricks.
|
|
7
|
-
from fabricks.utils.path import
|
|
5
|
+
from fabricks.context.config import PATH_CONFIG
|
|
6
|
+
from fabricks.models import Database, RuntimeConf, StepBronzeConf, StepGoldConf, StepSilverConf
|
|
7
|
+
from fabricks.utils.path import FileSharePath, GitPath
|
|
8
8
|
|
|
9
9
|
with open(str(PATH_CONFIG)) as f:
|
|
10
10
|
data = yaml.safe_load(f)
|
|
11
11
|
|
|
12
|
-
|
|
13
|
-
assert
|
|
14
|
-
CONF_RUNTIME: Final[
|
|
12
|
+
conf_data = [d["conf"] for d in data][0]
|
|
13
|
+
assert conf_data, "conf mandatory"
|
|
14
|
+
CONF_RUNTIME: Final[RuntimeConf] = RuntimeConf.model_validate(conf_data)
|
|
15
15
|
|
|
16
|
-
BRONZE = CONF_RUNTIME.
|
|
17
|
-
SILVER = CONF_RUNTIME.
|
|
18
|
-
GOLD = CONF_RUNTIME.
|
|
16
|
+
BRONZE: list[StepBronzeConf] = CONF_RUNTIME.bronze or []
|
|
17
|
+
SILVER: list[StepSilverConf] = CONF_RUNTIME.silver or []
|
|
18
|
+
GOLD: list[StepGoldConf] = CONF_RUNTIME.gold or []
|
|
19
19
|
STEPS = BRONZE + SILVER + GOLD
|
|
20
20
|
|
|
21
|
-
databases = CONF_RUNTIME.
|
|
22
|
-
credentials = CONF_RUNTIME.
|
|
23
|
-
variables = CONF_RUNTIME.
|
|
21
|
+
databases: list[Database] = CONF_RUNTIME.databases or []
|
|
22
|
+
credentials = CONF_RUNTIME.credentials or []
|
|
23
|
+
variables = CONF_RUNTIME.variables or {}
|
|
24
24
|
VARIABLES: dict = variables
|
|
25
25
|
|
|
26
|
-
conf_options = CONF_RUNTIME.get("options", {})
|
|
27
|
-
assert conf_options, "options mandatory"
|
|
28
26
|
|
|
29
|
-
IS_UNITY_CATALOG: Final[bool] =
|
|
30
|
-
CATALOG: Optional[str] =
|
|
27
|
+
IS_UNITY_CATALOG: Final[bool] = CONF_RUNTIME.options.unity_catalog or False
|
|
28
|
+
CATALOG: Optional[str] = CONF_RUNTIME.options.catalog
|
|
31
29
|
|
|
32
30
|
if IS_UNITY_CATALOG and not CATALOG:
|
|
33
|
-
raise ValueError("catalog mandatory in options
|
|
31
|
+
raise ValueError("catalog mandatory in options if unity catalog is enabled")
|
|
34
32
|
|
|
35
|
-
|
|
36
|
-
assert secret_scope, "secret_scope mandatory in options"
|
|
37
|
-
SECRET_SCOPE: Final[str] = secret_scope
|
|
33
|
+
SECRET_SCOPE: Final[str] = CONF_RUNTIME.options.secret_scope
|
|
38
34
|
|
|
39
|
-
|
|
40
|
-
TIMEZONE: Final[str] = timezone
|
|
35
|
+
TIMEZONE: Final[Optional[str]] = CONF_RUNTIME.options.timezone
|
|
41
36
|
|
|
42
|
-
IS_TYPE_WIDENING: Final[bool] =
|
|
37
|
+
IS_TYPE_WIDENING: Final[bool] = CONF_RUNTIME.options.type_widening or False
|
|
43
38
|
|
|
44
|
-
|
|
45
|
-
|
|
39
|
+
# Resolve all paths at once
|
|
40
|
+
PATHS_RESOLVED = CONF_RUNTIME.resolved_path_options
|
|
46
41
|
|
|
47
|
-
|
|
48
|
-
assert fabricks_uri, "storage mandatory in path options"
|
|
49
|
-
FABRICKS_STORAGE: Final[Path] = Path.from_uri(fabricks_uri, regex=variables)
|
|
42
|
+
FABRICKS_STORAGE: Final[FileSharePath] = PATHS_RESOLVED.storage
|
|
50
43
|
|
|
51
|
-
FABRICKS_STORAGE_CREDENTIAL: Final[Optional[str]] = path_options.
|
|
44
|
+
FABRICKS_STORAGE_CREDENTIAL: Final[Optional[str]] = CONF_RUNTIME.path_options.storage_credential
|
|
52
45
|
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
46
|
+
PATH_UDFS: Final[GitPath] = PATHS_RESOLVED.udfs
|
|
47
|
+
PATH_PARSERS: Final[GitPath] = PATHS_RESOLVED.parsers
|
|
48
|
+
PATH_EXTENDERS: Final[GitPath] = PATHS_RESOLVED.extenders
|
|
49
|
+
PATH_VIEWS: Final[GitPath] = PATHS_RESOLVED.views
|
|
50
|
+
PATH_SCHEDULES: Final[GitPath] = PATHS_RESOLVED.schedules
|
|
51
|
+
PATH_REQUIREMENTS: Final[GitPath] = PATHS_RESOLVED.requirements
|
|
52
|
+
PATH_MASKS: Final[GitPath] = PATHS_RESOLVED.masks
|
|
53
|
+
PATHS_STORAGE: Final[dict[str, FileSharePath]] = PATHS_RESOLVED.storages
|
|
54
|
+
PATHS_RUNTIME: Final[dict[str, GitPath]] = PATHS_RESOLVED.runtimes
|
|
56
55
|
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
path_extenders = path_options.get("extenders", "fabricks/extenders")
|
|
62
|
-
assert path_extenders, "path to extenders mandatory"
|
|
63
|
-
PATH_EXTENDERS: Final[Path] = PATH_RUNTIME.joinpath(path_extenders)
|
|
64
|
-
|
|
65
|
-
path_views = path_options.get("views", "fabricks/views")
|
|
66
|
-
assert path_views, "path to views mandatory"
|
|
67
|
-
PATH_VIEWS: Final[Path] = PATH_RUNTIME.joinpath(path_views)
|
|
68
|
-
|
|
69
|
-
path_schedules = path_options.get("schedules", "fabricks/schedules")
|
|
70
|
-
assert path_schedules, "path to schedules mandatory"
|
|
71
|
-
PATH_SCHEDULES: Final[Path] = PATH_RUNTIME.joinpath(path_schedules)
|
|
72
|
-
|
|
73
|
-
path_requirements = path_options.get("requirements", "fabricks/requirements")
|
|
74
|
-
assert path_requirements, "path to requirements mandatory"
|
|
75
|
-
PATH_REQUIREMENTS: Final[Path] = PATH_RUNTIME.joinpath(path_requirements)
|
|
76
|
-
|
|
77
|
-
path_masks = path_options.get("masks", "fabricks/masks")
|
|
78
|
-
assert path_masks, "path to masks mandatory"
|
|
79
|
-
PATH_MASKS: Final[Path] = PATH_RUNTIME.joinpath(path_masks)
|
|
80
|
-
|
|
81
|
-
PATHS_STORAGE: Final[dict[str, Path]] = {
|
|
82
|
-
"fabricks": FABRICKS_STORAGE,
|
|
83
|
-
**get_storage_paths(BRONZE, variables),
|
|
84
|
-
**get_storage_paths(SILVER, variables),
|
|
85
|
-
**get_storage_paths(GOLD, variables),
|
|
86
|
-
**get_storage_paths(databases, variables),
|
|
87
|
-
}
|
|
88
|
-
|
|
89
|
-
PATHS_RUNTIME: Final[dict[str, Path]] = {
|
|
90
|
-
**get_runtime_path(BRONZE, PATH_RUNTIME),
|
|
91
|
-
**get_runtime_path(SILVER, PATH_RUNTIME),
|
|
92
|
-
**get_runtime_path(GOLD, PATH_RUNTIME),
|
|
93
|
-
}
|
|
56
|
+
Bronzes = [b.name for b in BRONZE]
|
|
57
|
+
Silvers = [s.name for s in SILVER]
|
|
58
|
+
Golds = [g.name for g in GOLD]
|
|
59
|
+
Steps = Bronzes + Silvers + Golds
|
|
@@ -20,10 +20,11 @@ def add_credentials_to_spark(spark: Optional[SparkSession] = None):
|
|
|
20
20
|
if spark is None:
|
|
21
21
|
spark = get_spark()
|
|
22
22
|
|
|
23
|
-
credentials = CONF_RUNTIME.
|
|
24
|
-
for
|
|
25
|
-
|
|
26
|
-
|
|
23
|
+
credentials = CONF_RUNTIME.credentials or []
|
|
24
|
+
for cred in credentials:
|
|
25
|
+
for uri, secret in cred.items():
|
|
26
|
+
s = get_secret_from_secret_scope(secret_scope=SECRET_SCOPE, name=secret)
|
|
27
|
+
add_secret_to_spark(secret=s, uri=uri, spark=spark)
|
|
27
28
|
|
|
28
29
|
|
|
29
30
|
def add_spark_options_to_spark(spark: Optional[SparkSession] = None):
|
|
@@ -35,13 +36,13 @@ def add_spark_options_to_spark(spark: Optional[SparkSession] = None):
|
|
|
35
36
|
spark.sql("set spark.databricks.delta.resolveMergeUpdateStructsByName.enabled = True;")
|
|
36
37
|
|
|
37
38
|
# runtime options
|
|
38
|
-
spark_options = CONF_RUNTIME.
|
|
39
|
+
spark_options = CONF_RUNTIME.spark_options
|
|
39
40
|
if spark_options:
|
|
40
|
-
sql_options = spark_options.
|
|
41
|
+
sql_options = spark_options.sql or {}
|
|
41
42
|
for key, value in sql_options.items():
|
|
42
43
|
spark.sql(f"set {key} = {value};")
|
|
43
44
|
|
|
44
|
-
conf_options = spark_options.
|
|
45
|
+
conf_options = spark_options.conf or {}
|
|
45
46
|
for key, value in conf_options.items():
|
|
46
47
|
spark.conf.set(key, value)
|
|
47
48
|
|
fabricks/context/utils.py
CHANGED
|
@@ -4,54 +4,53 @@ import fabricks.context.config as c
|
|
|
4
4
|
import fabricks.context.runtime as r
|
|
5
5
|
|
|
6
6
|
|
|
7
|
-
def pprint_runtime(extended: bool =
|
|
7
|
+
def pprint_runtime(extended: bool = True) -> None:
|
|
8
8
|
print("=" * 60)
|
|
9
9
|
print("FABRICKS RUNTIME CONFIGURATION")
|
|
10
10
|
print("=" * 60)
|
|
11
11
|
|
|
12
12
|
# Core Paths Section
|
|
13
|
-
print("\n📁
|
|
14
|
-
print(f"
|
|
15
|
-
print(f"
|
|
16
|
-
print(f"
|
|
17
|
-
print(f"
|
|
18
|
-
print(f"
|
|
19
|
-
print(f"
|
|
13
|
+
print("\n📁 CONFIG:")
|
|
14
|
+
print(f" • Runtime: {c.PATH_RUNTIME.string}")
|
|
15
|
+
print(f" • Notebooks: {c.PATH_NOTEBOOKS.string}")
|
|
16
|
+
print(f" • Config: {c.PATH_CONFIG.string}")
|
|
17
|
+
print(f" • Log Level: {logging.getLevelName(c.LOGLEVEL)}")
|
|
18
|
+
print(f" • Debug Mode: {'✅' if c.IS_DEBUGMODE else '❌'}")
|
|
19
|
+
print(f" • Job Config from YAML: {'✅' if c.IS_JOB_CONFIG_FROM_YAML else '❌'}")
|
|
20
20
|
|
|
21
|
-
print("\n⚙️
|
|
22
|
-
print("\n🔄 PIPELINE STEPS:")
|
|
21
|
+
print("\n⚙️ STEPS:")
|
|
23
22
|
|
|
24
|
-
def _print_steps(
|
|
25
|
-
if
|
|
26
|
-
print(f" {icon} {
|
|
27
|
-
for step in
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
print(f"
|
|
23
|
+
def _print_steps(steps: list[r.StepBronzeConf] | list[r.StepSilverConf] | list[r.StepGoldConf], layer, icon):
|
|
24
|
+
if steps:
|
|
25
|
+
print(f" {icon} {layer}:")
|
|
26
|
+
for step in steps:
|
|
27
|
+
print(f" • {step.name}")
|
|
28
|
+
if extended:
|
|
29
|
+
print(f" - 📖 {r.PATHS_RUNTIME.get(step.name)}")
|
|
30
|
+
print(f" - 💾 {r.PATHS_STORAGE.get(step.name)}")
|
|
31
31
|
else:
|
|
32
|
-
print(f" {icon} {
|
|
32
|
+
print(f" {icon} {layer}: No steps")
|
|
33
33
|
|
|
34
34
|
_print_steps(r.BRONZE, "Bronze", "🥉")
|
|
35
35
|
_print_steps(r.SILVER, "Silver", "🥈")
|
|
36
36
|
_print_steps(r.GOLD, "Gold", "🥇")
|
|
37
37
|
|
|
38
38
|
# Storage Configuration Section
|
|
39
|
-
print("\n💾 STORAGE
|
|
40
|
-
print(f"
|
|
41
|
-
print(f"
|
|
39
|
+
print("\n💾 FABRICKS STORAGE:")
|
|
40
|
+
print(f" • Storage URI: {r.FABRICKS_STORAGE.string}")
|
|
41
|
+
print(f" • Storage Credential: {r.FABRICKS_STORAGE_CREDENTIAL or 'Not configured'}")
|
|
42
42
|
|
|
43
43
|
# Unity Catalog Section
|
|
44
44
|
print("\n🏛️ UNITY CATALOG:")
|
|
45
|
-
print(f"
|
|
45
|
+
print(f" • Enabled: {'✅' if r.IS_UNITY_CATALOG else '❌'}")
|
|
46
46
|
if r.IS_UNITY_CATALOG and r.CATALOG:
|
|
47
|
-
print(f"
|
|
47
|
+
print(f" • Catalog: {r.CATALOG}")
|
|
48
48
|
|
|
49
49
|
# Security Section
|
|
50
50
|
print("\n🔐 SECURITY:")
|
|
51
|
-
print(f"
|
|
52
|
-
|
|
51
|
+
print(f" • Secret Scope: {r.SECRET_SCOPE}")
|
|
53
52
|
print("\n🌐 ADDITIONAL SETTINGS:")
|
|
54
|
-
print(f"
|
|
53
|
+
print(f" • Timezone: {r.TIMEZONE}")
|
|
55
54
|
|
|
56
55
|
if extended:
|
|
57
56
|
# Component Paths Section
|
|
@@ -65,16 +64,4 @@ def pprint_runtime(extended: bool = False) -> None:
|
|
|
65
64
|
]
|
|
66
65
|
|
|
67
66
|
for name, path in components:
|
|
68
|
-
print(f"
|
|
69
|
-
|
|
70
|
-
# Storage Paths Section
|
|
71
|
-
print("\n📦 STORAGE PATHS:")
|
|
72
|
-
for name, path in sorted(r.PATHS_STORAGE.items()):
|
|
73
|
-
icon = "🏭" if name == "fabricks" else "📊"
|
|
74
|
-
print(f" {icon} {name}: {path.string}")
|
|
75
|
-
|
|
76
|
-
# Runtime Paths Section
|
|
77
|
-
if r.PATHS_RUNTIME:
|
|
78
|
-
print("\n⚡ RUNTIME PATHS:")
|
|
79
|
-
for name, path in sorted(r.PATHS_RUNTIME.items()):
|
|
80
|
-
print(f" 📂 {name}: {path.string}")
|
|
67
|
+
print(f" • {name}: {path.string}")
|
fabricks/core/__init__.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from fabricks.core.jobs import
|
|
1
|
+
from fabricks.core.jobs import get_job, get_jobs
|
|
2
2
|
from fabricks.core.steps import get_step
|
|
3
3
|
|
|
4
|
-
__all__ = ["get_job", "get_jobs", "get_step"
|
|
4
|
+
__all__ = ["get_job", "get_jobs", "get_step"]
|
fabricks/core/dags/base.py
CHANGED
|
@@ -59,10 +59,9 @@ class BaseDags:
|
|
|
59
59
|
d = TABLE_LOG_HANDLER.table.query(q)
|
|
60
60
|
df = SPARK.createDataFrame(d)
|
|
61
61
|
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
df = df.withColumn("NotebookId", expr("null"))
|
|
62
|
+
for column in ["Exception", "NotebookId", "Json"]:
|
|
63
|
+
if column not in df.columns:
|
|
64
|
+
df = df.withColumn(column, expr("null"))
|
|
66
65
|
|
|
67
66
|
df = SPARK.sql(
|
|
68
67
|
"""
|
|
@@ -76,7 +75,8 @@ class BaseDags:
|
|
|
76
75
|
`Level` as `level`,
|
|
77
76
|
`Message` as `status`,
|
|
78
77
|
to_timestamp(`Created`, 'dd/MM/yy HH:mm:ss') as `timestamp`,
|
|
79
|
-
from_json(Exception, 'type STRING, message STRING, traceback STRING') as exception
|
|
78
|
+
from_json(Exception, 'type STRING, message STRING, traceback STRING') as exception,
|
|
79
|
+
Json as json
|
|
80
80
|
from
|
|
81
81
|
{df}
|
|
82
82
|
""",
|
fabricks/core/dags/processor.py
CHANGED
|
@@ -2,7 +2,7 @@ import json
|
|
|
2
2
|
import threading
|
|
3
3
|
import time
|
|
4
4
|
from multiprocessing import Process
|
|
5
|
-
from typing import Any, List
|
|
5
|
+
from typing import Any, List
|
|
6
6
|
|
|
7
7
|
from azure.core.exceptions import AzureError
|
|
8
8
|
from databricks.sdk.runtime import dbutils, spark
|
|
@@ -12,14 +12,13 @@ from fabricks.context import PATH_NOTEBOOKS
|
|
|
12
12
|
from fabricks.core.dags.base import BaseDags
|
|
13
13
|
from fabricks.core.dags.log import LOGGER, TABLE_LOG_HANDLER
|
|
14
14
|
from fabricks.core.dags.run import run
|
|
15
|
-
from fabricks.core.jobs.base._types import TStep
|
|
16
15
|
from fabricks.core.steps.get_step import get_step
|
|
17
16
|
from fabricks.utils.azure_queue import AzureQueue
|
|
18
17
|
from fabricks.utils.azure_table import AzureTable
|
|
19
18
|
|
|
20
19
|
|
|
21
20
|
class DagProcessor(BaseDags):
|
|
22
|
-
def __init__(self, schedule_id: str, schedule: str, step:
|
|
21
|
+
def __init__(self, schedule_id: str, schedule: str, step: str, notebook: bool = True):
|
|
23
22
|
self.step = get_step(step=step)
|
|
24
23
|
self.schedule = schedule
|
|
25
24
|
|
fabricks/core/extenders.py
CHANGED
|
@@ -12,7 +12,7 @@ def get_extender(name: str) -> Callable:
|
|
|
12
12
|
if not IS_UNITY_CATALOG:
|
|
13
13
|
assert path.exists(), "no valid extender found in {path.string}"
|
|
14
14
|
else:
|
|
15
|
-
DEFAULT_LOGGER.debug(f"could not check if extender exists ({path.string})")
|
|
15
|
+
DEFAULT_LOGGER.debug(f"could not check if extender exists ({path.string})", extra={"label": "fabricks"})
|
|
16
16
|
|
|
17
17
|
load_module_from_path(name, path)
|
|
18
18
|
e = EXTENDERS[name]
|
fabricks/core/job_schema.py
CHANGED
|
@@ -1,31 +1,41 @@
|
|
|
1
|
-
from dataclasses import dataclass
|
|
2
1
|
from typing import List
|
|
3
2
|
|
|
4
|
-
from
|
|
5
|
-
|
|
3
|
+
from pydantic import BaseModel
|
|
4
|
+
|
|
5
|
+
from fabricks.models import JobConf
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class JobWrapper(BaseModel):
|
|
9
|
+
"""Wrapper for JobConf to generate array schema."""
|
|
10
|
+
|
|
11
|
+
job: JobConf
|
|
6
12
|
|
|
7
13
|
|
|
8
14
|
def get_job_schema() -> str:
|
|
9
15
|
import json
|
|
10
16
|
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
17
|
+
# Generate JSON schema using Pydantic's built-in method
|
|
18
|
+
# Use List[JobWrapper] to create the array schema
|
|
19
|
+
from pydantic import TypeAdapter
|
|
14
20
|
|
|
15
|
-
|
|
16
|
-
|
|
21
|
+
adapter = TypeAdapter(List[JobWrapper])
|
|
22
|
+
sc = adapter.json_schema()
|
|
23
|
+
|
|
24
|
+
# Remove properties that are not defined in YAML
|
|
25
|
+
defs: dict[str, dict] = sc.get("$defs", {})
|
|
17
26
|
removals = [("Job", "job_id"), ("Job", "table")]
|
|
18
27
|
|
|
19
28
|
for key, defi in defs.items():
|
|
20
29
|
for ent, prop in removals:
|
|
21
|
-
if key.startswith(ent) and prop in defi
|
|
22
|
-
req: List[str] = defi
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
30
|
+
if key.startswith(ent) and prop in defi.get("properties", {}):
|
|
31
|
+
req: List[str] = defi.get("required", [])
|
|
32
|
+
if prop in req:
|
|
33
|
+
req.remove(prop) # not defined in yaml
|
|
34
|
+
|
|
35
|
+
jobprops: dict = defi.get("properties", {})
|
|
36
|
+
jobprops.pop(prop, None)
|
|
37
|
+
|
|
38
|
+
return json.dumps(sc, indent=4)
|
|
29
39
|
|
|
30
40
|
|
|
31
41
|
def print_job_schema():
|
fabricks/core/jobs/__init__.py
CHANGED
|
@@ -1,7 +1,6 @@
|
|
|
1
|
-
from fabricks.core.jobs.base import BaseJob
|
|
1
|
+
from fabricks.core.jobs.base import BaseJob
|
|
2
2
|
from fabricks.core.jobs.bronze import Bronze
|
|
3
3
|
from fabricks.core.jobs.get_job import get_job
|
|
4
|
-
from fabricks.core.jobs.get_job_id import get_job_id
|
|
5
4
|
from fabricks.core.jobs.get_jobs import get_jobs
|
|
6
5
|
from fabricks.core.jobs.gold import Gold
|
|
7
6
|
from fabricks.core.jobs.silver import Silver
|
|
@@ -9,13 +8,8 @@ from fabricks.core.jobs.silver import Silver
|
|
|
9
8
|
__all__ = [
|
|
10
9
|
"BaseJob",
|
|
11
10
|
"Bronze",
|
|
12
|
-
"Bronzes",
|
|
13
|
-
"get_job_id",
|
|
14
11
|
"get_job",
|
|
15
12
|
"get_jobs",
|
|
16
13
|
"Gold",
|
|
17
|
-
"Golds",
|
|
18
14
|
"Silver",
|
|
19
|
-
"Silvers",
|
|
20
|
-
"Steps",
|
|
21
15
|
]
|