fabricks 3.0.5.2__py3-none-any.whl → 3.0.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- fabricks/api/__init__.py +2 -0
- fabricks/api/context.py +1 -2
- fabricks/api/deploy.py +3 -0
- fabricks/api/job_schema.py +2 -2
- fabricks/api/masks.py +3 -0
- fabricks/api/notebooks/initialize.py +2 -2
- fabricks/api/notebooks/process.py +2 -2
- fabricks/api/notebooks/run.py +2 -2
- fabricks/api/notebooks/schedule.py +75 -0
- fabricks/api/notebooks/terminate.py +2 -2
- fabricks/api/schedules.py +2 -16
- fabricks/cdc/__init__.py +2 -2
- fabricks/cdc/base/__init__.py +2 -2
- fabricks/cdc/base/_types.py +9 -2
- fabricks/cdc/base/configurator.py +86 -41
- fabricks/cdc/base/generator.py +44 -35
- fabricks/cdc/base/merger.py +16 -14
- fabricks/cdc/base/processor.py +232 -144
- fabricks/cdc/nocdc.py +8 -7
- fabricks/cdc/templates/{query → ctes}/base.sql.jinja +7 -6
- fabricks/cdc/templates/ctes/current.sql.jinja +28 -0
- fabricks/cdc/templates/ctes/deduplicate_hash.sql.jinja +32 -0
- fabricks/cdc/templates/ctes/deduplicate_key.sql.jinja +31 -0
- fabricks/cdc/templates/{query → ctes}/rectify.sql.jinja +4 -22
- fabricks/cdc/templates/ctes/slice.sql.jinja +1 -0
- fabricks/cdc/templates/filter.sql.jinja +4 -4
- fabricks/cdc/templates/macros/bactick.sql.jinja +1 -0
- fabricks/cdc/templates/macros/hash.sql.jinja +18 -0
- fabricks/cdc/templates/merge.sql.jinja +3 -2
- fabricks/cdc/templates/merges/nocdc.sql.jinja +41 -0
- fabricks/cdc/templates/queries/context.sql.jinja +186 -0
- fabricks/cdc/templates/{query/nocdc.sql.jinja → queries/nocdc/complete.sql.jinja} +1 -1
- fabricks/cdc/templates/queries/nocdc/update.sql.jinja +35 -0
- fabricks/cdc/templates/{query → queries}/scd1.sql.jinja +2 -28
- fabricks/cdc/templates/{query → queries}/scd2.sql.jinja +29 -48
- fabricks/cdc/templates/query.sql.jinja +15 -11
- fabricks/context/__init__.py +18 -4
- fabricks/context/_types.py +2 -0
- fabricks/context/config/__init__.py +92 -0
- fabricks/context/config/utils.py +53 -0
- fabricks/context/log.py +8 -2
- fabricks/context/runtime.py +87 -263
- fabricks/context/secret.py +1 -1
- fabricks/context/spark_session.py +1 -1
- fabricks/context/utils.py +76 -0
- fabricks/core/dags/generator.py +6 -7
- fabricks/core/dags/log.py +2 -15
- fabricks/core/dags/processor.py +11 -11
- fabricks/core/dags/utils.py +15 -1
- fabricks/core/{scripts/job_schema.py → job_schema.py} +4 -0
- fabricks/core/jobs/base/_types.py +64 -22
- fabricks/core/jobs/base/checker.py +13 -12
- fabricks/core/jobs/base/configurator.py +41 -67
- fabricks/core/jobs/base/generator.py +55 -24
- fabricks/core/jobs/base/invoker.py +54 -30
- fabricks/core/jobs/base/processor.py +43 -26
- fabricks/core/jobs/bronze.py +45 -38
- fabricks/core/jobs/get_jobs.py +2 -2
- fabricks/core/jobs/get_schedule.py +10 -0
- fabricks/core/jobs/get_schedules.py +32 -0
- fabricks/core/jobs/gold.py +61 -48
- fabricks/core/jobs/silver.py +39 -40
- fabricks/core/masks.py +52 -0
- fabricks/core/parsers/base.py +2 -2
- fabricks/core/schedules/__init__.py +14 -0
- fabricks/core/schedules/diagrams.py +46 -0
- fabricks/core/schedules/get_schedule.py +5 -0
- fabricks/core/schedules/get_schedules.py +9 -0
- fabricks/core/schedules/run.py +3 -0
- fabricks/core/schedules/views.py +61 -0
- fabricks/core/steps/base.py +110 -72
- fabricks/core/udfs.py +12 -23
- fabricks/core/views.py +20 -13
- fabricks/deploy/__init__.py +97 -0
- fabricks/deploy/masks.py +8 -0
- fabricks/deploy/notebooks.py +71 -0
- fabricks/deploy/schedules.py +8 -0
- fabricks/{core/deploy → deploy}/tables.py +16 -13
- fabricks/{core/deploy → deploy}/udfs.py +3 -1
- fabricks/deploy/utils.py +36 -0
- fabricks/{core/deploy → deploy}/views.py +5 -9
- fabricks/metastore/database.py +3 -3
- fabricks/metastore/dbobject.py +4 -4
- fabricks/metastore/table.py +157 -88
- fabricks/metastore/view.py +13 -6
- fabricks/utils/_types.py +6 -0
- fabricks/utils/azure_table.py +4 -3
- fabricks/utils/helpers.py +141 -11
- fabricks/utils/log.py +29 -18
- fabricks/utils/read/_types.py +1 -1
- fabricks/utils/schema/get_schema_for_type.py +6 -0
- fabricks/utils/write/delta.py +3 -3
- {fabricks-3.0.5.2.dist-info → fabricks-3.0.6.dist-info}/METADATA +2 -1
- fabricks-3.0.6.dist-info/RECORD +175 -0
- fabricks/api/notebooks/add_fabricks.py +0 -13
- fabricks/api/notebooks/optimize.py +0 -29
- fabricks/api/notebooks/vacuum.py +0 -29
- fabricks/cdc/templates/query/context.sql.jinja +0 -101
- fabricks/cdc/templates/query/current.sql.jinja +0 -32
- fabricks/cdc/templates/query/deduplicate_hash.sql.jinja +0 -21
- fabricks/cdc/templates/query/deduplicate_key.sql.jinja +0 -14
- fabricks/cdc/templates/query/hash.sql.jinja +0 -1
- fabricks/cdc/templates/query/slice.sql.jinja +0 -14
- fabricks/config/__init__.py +0 -0
- fabricks/config/base.py +0 -8
- fabricks/config/fabricks/__init__.py +0 -26
- fabricks/config/fabricks/base.py +0 -90
- fabricks/config/fabricks/environment.py +0 -9
- fabricks/config/fabricks/pyproject.py +0 -47
- fabricks/config/jobs/__init__.py +0 -6
- fabricks/config/jobs/base.py +0 -101
- fabricks/config/jobs/bronze.py +0 -38
- fabricks/config/jobs/gold.py +0 -27
- fabricks/config/jobs/silver.py +0 -22
- fabricks/config/runtime.py +0 -67
- fabricks/config/steps/__init__.py +0 -6
- fabricks/config/steps/base.py +0 -50
- fabricks/config/steps/bronze.py +0 -7
- fabricks/config/steps/gold.py +0 -14
- fabricks/config/steps/silver.py +0 -15
- fabricks/core/deploy/__init__.py +0 -17
- fabricks/core/schedules.py +0 -142
- fabricks/core/scripts/__init__.py +0 -9
- fabricks/core/scripts/armageddon.py +0 -87
- fabricks/core/scripts/stats.py +0 -51
- fabricks/core/scripts/steps.py +0 -26
- fabricks-3.0.5.2.dist-info/RECORD +0 -177
- /fabricks/cdc/templates/{filter → filters}/final.sql.jinja +0 -0
- /fabricks/cdc/templates/{filter → filters}/latest.sql.jinja +0 -0
- /fabricks/cdc/templates/{filter → filters}/update.sql.jinja +0 -0
- /fabricks/cdc/templates/{merge → merges}/scd1.sql.jinja +0 -0
- /fabricks/cdc/templates/{merge → merges}/scd2.sql.jinja +0 -0
- /fabricks/cdc/templates/{query → queries}/__init__.py +0 -0
- /fabricks/cdc/templates/{query → queries}/final.sql.jinja +0 -0
- /fabricks/core/{utils.py → parsers/utils.py} +0 -0
- /fabricks/core/{scripts → schedules}/generate.py +0 -0
- /fabricks/core/{scripts → schedules}/process.py +0 -0
- /fabricks/core/{scripts → schedules}/terminate.py +0 -0
- {fabricks-3.0.5.2.dist-info → fabricks-3.0.6.dist-info}/WHEEL +0 -0
fabricks/context/__init__.py
CHANGED
|
@@ -1,3 +1,12 @@
|
|
|
1
|
+
from fabricks.context.config import (
|
|
2
|
+
IS_DEBUGMODE,
|
|
3
|
+
IS_DEVMODE,
|
|
4
|
+
IS_JOB_CONFIG_FROM_YAML,
|
|
5
|
+
LOGLEVEL,
|
|
6
|
+
PATH_CONFIG,
|
|
7
|
+
PATH_NOTEBOOKS,
|
|
8
|
+
PATH_RUNTIME,
|
|
9
|
+
)
|
|
1
10
|
from fabricks.context.runtime import (
|
|
2
11
|
BRONZE,
|
|
3
12
|
CATALOG,
|
|
@@ -5,15 +14,12 @@ from fabricks.context.runtime import (
|
|
|
5
14
|
FABRICKS_STORAGE,
|
|
6
15
|
FABRICKS_STORAGE_CREDENTIAL,
|
|
7
16
|
GOLD,
|
|
8
|
-
IS_JOB_CONFIG_FROM_YAML,
|
|
9
17
|
IS_TYPE_WIDENING,
|
|
10
18
|
IS_UNITY_CATALOG,
|
|
11
|
-
LOGLEVEL,
|
|
12
|
-
PATH_CONFIG,
|
|
13
19
|
PATH_EXTENDERS,
|
|
20
|
+
PATH_MASKS,
|
|
14
21
|
PATH_PARSERS,
|
|
15
22
|
PATH_REQUIREMENTS,
|
|
16
|
-
PATH_RUNTIME,
|
|
17
23
|
PATH_SCHEDULES,
|
|
18
24
|
PATH_UDFS,
|
|
19
25
|
PATH_VIEWS,
|
|
@@ -22,9 +28,11 @@ from fabricks.context.runtime import (
|
|
|
22
28
|
SECRET_SCOPE,
|
|
23
29
|
SILVER,
|
|
24
30
|
STEPS,
|
|
31
|
+
TIMEZONE,
|
|
25
32
|
VARIABLES,
|
|
26
33
|
)
|
|
27
34
|
from fabricks.context.spark_session import DBUTILS, SPARK, build_spark_session, init_spark_session
|
|
35
|
+
from fabricks.context.utils import pprint_runtime
|
|
28
36
|
|
|
29
37
|
__all__ = [
|
|
30
38
|
"BRONZE",
|
|
@@ -36,12 +44,16 @@ __all__ = [
|
|
|
36
44
|
"FABRICKS_STORAGE",
|
|
37
45
|
"GOLD",
|
|
38
46
|
"init_spark_session",
|
|
47
|
+
"IS_DEBUGMODE",
|
|
48
|
+
"IS_DEVMODE",
|
|
39
49
|
"IS_JOB_CONFIG_FROM_YAML",
|
|
40
50
|
"IS_TYPE_WIDENING",
|
|
41
51
|
"IS_UNITY_CATALOG",
|
|
42
52
|
"LOGLEVEL",
|
|
43
53
|
"PATH_CONFIG",
|
|
44
54
|
"PATH_EXTENDERS",
|
|
55
|
+
"PATH_MASKS",
|
|
56
|
+
"PATH_NOTEBOOKS",
|
|
45
57
|
"PATH_PARSERS",
|
|
46
58
|
"PATH_REQUIREMENTS",
|
|
47
59
|
"PATH_RUNTIME",
|
|
@@ -49,10 +61,12 @@ __all__ = [
|
|
|
49
61
|
"PATH_UDFS",
|
|
50
62
|
"PATH_VIEWS",
|
|
51
63
|
"PATHS_RUNTIME",
|
|
64
|
+
"pprint_runtime",
|
|
52
65
|
"PATHS_STORAGE",
|
|
53
66
|
"SECRET_SCOPE",
|
|
54
67
|
"SILVER",
|
|
55
68
|
"SPARK",
|
|
56
69
|
"STEPS",
|
|
70
|
+
"TIMEZONE",
|
|
57
71
|
"VARIABLES",
|
|
58
72
|
]
|
fabricks/context/_types.py
CHANGED
|
@@ -32,6 +32,7 @@ class RuntimeOptions(TypedDict):
|
|
|
32
32
|
workers: int
|
|
33
33
|
timeouts: RuntimeTimeoutOptions
|
|
34
34
|
retention_days: int
|
|
35
|
+
timezone: Optional[str]
|
|
35
36
|
|
|
36
37
|
|
|
37
38
|
class SparkOptions(TypedDict):
|
|
@@ -83,6 +84,7 @@ class TableOptions(TypedDict):
|
|
|
83
84
|
liquid_clustering: Optional[bool]
|
|
84
85
|
properties: Optional[dict[str, str]]
|
|
85
86
|
retention_days: Optional[int]
|
|
87
|
+
masks: Optional[dict[str, str]]
|
|
86
88
|
|
|
87
89
|
|
|
88
90
|
class Bronze(Step):
|
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
import os
|
|
3
|
+
from typing import Final
|
|
4
|
+
|
|
5
|
+
from fabricks.context.config.utils import get_config_from_file
|
|
6
|
+
from fabricks.utils.path import Path
|
|
7
|
+
from fabricks.utils.spark import spark
|
|
8
|
+
|
|
9
|
+
file_path, file_config = get_config_from_file()
|
|
10
|
+
|
|
11
|
+
runtime = os.environ.get("FABRICKS_RUNTIME", "none")
|
|
12
|
+
runtime = None if runtime.lower() == "none" else runtime
|
|
13
|
+
if runtime is None:
|
|
14
|
+
if runtime := file_config.get("runtime"):
|
|
15
|
+
assert file_path is not None
|
|
16
|
+
runtime = file_path.joinpath(runtime)
|
|
17
|
+
|
|
18
|
+
if runtime is None:
|
|
19
|
+
if file_path is not None:
|
|
20
|
+
runtime = file_path
|
|
21
|
+
else:
|
|
22
|
+
raise ValueError(
|
|
23
|
+
"could not resolve runtime (could not find pyproject.toml nor fabricksconfig.json nor FABRICKS_RUNTIME)"
|
|
24
|
+
)
|
|
25
|
+
|
|
26
|
+
path_runtime = Path(runtime, assume_git=True)
|
|
27
|
+
PATH_RUNTIME: Final[Path] = path_runtime
|
|
28
|
+
|
|
29
|
+
notebooks = os.environ.get("FABRICKS_NOTEBOOKS", "none")
|
|
30
|
+
notebooks = None if notebooks.lower() == "none" else notebooks
|
|
31
|
+
if notebooks is None:
|
|
32
|
+
if notebooks := file_config.get("notebooks"):
|
|
33
|
+
assert file_path is not None
|
|
34
|
+
notebooks = file_path.joinpath(notebooks)
|
|
35
|
+
|
|
36
|
+
notebooks = notebooks if notebooks else path_runtime.joinpath("notebooks")
|
|
37
|
+
PATH_NOTEBOOKS: Final[Path] = Path(str(notebooks), assume_git=True)
|
|
38
|
+
|
|
39
|
+
is_job_config_from_yaml = os.environ.get("FABRICKS_IS_JOB_CONFIG_FROM_YAML", None)
|
|
40
|
+
if is_job_config_from_yaml is None:
|
|
41
|
+
assert file_path is not None
|
|
42
|
+
is_job_config_from_yaml = file_config.get("job_config_from_yaml")
|
|
43
|
+
|
|
44
|
+
IS_JOB_CONFIG_FROM_YAML: Final[bool] = str(is_job_config_from_yaml).lower() in ("true", "1", "yes")
|
|
45
|
+
|
|
46
|
+
is_debugmode = os.environ.get("FABRICKS_IS_DEBUGMODE", None)
|
|
47
|
+
if is_debugmode is None:
|
|
48
|
+
is_debugmode = file_config.get("debugmode")
|
|
49
|
+
|
|
50
|
+
IS_DEBUGMODE: Final[bool] = str(is_debugmode).lower() in ("true", "1", "yes")
|
|
51
|
+
|
|
52
|
+
is_devmode = os.environ.get("FABRICKS_IS_DEVMODE", None)
|
|
53
|
+
if is_devmode is None:
|
|
54
|
+
is_devmode = file_config.get("devmode")
|
|
55
|
+
|
|
56
|
+
IS_DEVMODE: Final[bool] = str(is_devmode).lower() in ("true", "1", "yes")
|
|
57
|
+
|
|
58
|
+
loglevel = os.environ.get("FABRICKS_LOGLEVEL", None)
|
|
59
|
+
if loglevel is None:
|
|
60
|
+
loglevel = file_config.get("loglevel")
|
|
61
|
+
|
|
62
|
+
loglevel = loglevel.upper() if loglevel else "INFO"
|
|
63
|
+
if loglevel == "DEBUG":
|
|
64
|
+
_loglevel = logging.DEBUG
|
|
65
|
+
elif loglevel == "INFO":
|
|
66
|
+
_loglevel = logging.INFO
|
|
67
|
+
elif loglevel == "WARNING":
|
|
68
|
+
_loglevel = logging.WARNING
|
|
69
|
+
elif loglevel == "ERROR":
|
|
70
|
+
_loglevel = logging.ERROR
|
|
71
|
+
elif loglevel == "CRITICAL":
|
|
72
|
+
_loglevel = logging.CRITICAL
|
|
73
|
+
else:
|
|
74
|
+
raise ValueError(f"could not resolve {loglevel} (DEBUG, INFO, WARNING, ERROR or CRITICAL)")
|
|
75
|
+
|
|
76
|
+
LOGLEVEL = _loglevel
|
|
77
|
+
|
|
78
|
+
path_config = os.environ.get("FABRICKS_CONFIG")
|
|
79
|
+
if path_config is None:
|
|
80
|
+
if path_config := file_config.get("config"):
|
|
81
|
+
assert file_path is not None
|
|
82
|
+
path_config = file_path.joinpath(path_config)
|
|
83
|
+
else:
|
|
84
|
+
path_config = PATH_RUNTIME.joinpath(path_config).string if path_config else None
|
|
85
|
+
|
|
86
|
+
if not path_config:
|
|
87
|
+
path_config = PATH_RUNTIME.joinpath(
|
|
88
|
+
"fabricks",
|
|
89
|
+
f"conf.{spark.conf.get('spark.databricks.clusterUsageTags.clusterOwnerOrgId')}.yml",
|
|
90
|
+
).string
|
|
91
|
+
|
|
92
|
+
PATH_CONFIG: Final[Path] = Path(path_config, assume_git=True)
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
def get_config_from_toml():
|
|
2
|
+
import os
|
|
3
|
+
import pathlib
|
|
4
|
+
import sys
|
|
5
|
+
|
|
6
|
+
if sys.version_info >= (3, 11):
|
|
7
|
+
import tomllib
|
|
8
|
+
else:
|
|
9
|
+
import tomli as tomllib # type: ignore
|
|
10
|
+
|
|
11
|
+
path = pathlib.Path(os.getcwd())
|
|
12
|
+
while path is not None and not (path / "pyproject.toml").exists():
|
|
13
|
+
if path == path.parent:
|
|
14
|
+
break
|
|
15
|
+
path = path.parent
|
|
16
|
+
|
|
17
|
+
if (path / "pyproject.toml").exists():
|
|
18
|
+
with open((path / "pyproject.toml"), "rb") as f:
|
|
19
|
+
config = tomllib.load(f)
|
|
20
|
+
return path, config.get("tool", {}).get("fabricks", {})
|
|
21
|
+
|
|
22
|
+
return None, {}
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def get_config_from_json():
|
|
26
|
+
import json
|
|
27
|
+
import os
|
|
28
|
+
import pathlib
|
|
29
|
+
|
|
30
|
+
path = pathlib.Path(os.getcwd())
|
|
31
|
+
while path is not None and not (path / "fabricksconfig.json").exists():
|
|
32
|
+
if path == path.parent:
|
|
33
|
+
break
|
|
34
|
+
path = path.parent
|
|
35
|
+
|
|
36
|
+
if (path / "fabricksconfig.json").exists():
|
|
37
|
+
with open((path / "fabricksconfig.json"), "r") as f:
|
|
38
|
+
config = json.load(f)
|
|
39
|
+
return path, config
|
|
40
|
+
|
|
41
|
+
return None, {}
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def get_config_from_file():
|
|
45
|
+
json_path, json_config = get_config_from_json()
|
|
46
|
+
if json_config:
|
|
47
|
+
return json_path, json_config
|
|
48
|
+
|
|
49
|
+
pyproject_path, pyproject_config = get_config_from_toml()
|
|
50
|
+
if pyproject_config:
|
|
51
|
+
return pyproject_path, pyproject_config
|
|
52
|
+
|
|
53
|
+
return None, {}
|
fabricks/context/log.py
CHANGED
|
@@ -4,10 +4,16 @@ from typing import Final, Literal, Optional
|
|
|
4
4
|
|
|
5
5
|
import requests
|
|
6
6
|
|
|
7
|
-
from fabricks.context
|
|
7
|
+
from fabricks.context import IS_DEBUGMODE, LOGLEVEL, SECRET_SCOPE, TIMEZONE
|
|
8
8
|
from fabricks.utils.log import get_logger
|
|
9
9
|
|
|
10
|
-
logger, _ = get_logger(
|
|
10
|
+
logger, _ = get_logger(
|
|
11
|
+
"logs",
|
|
12
|
+
LOGLEVEL,
|
|
13
|
+
table=None,
|
|
14
|
+
debugmode=IS_DEBUGMODE,
|
|
15
|
+
timezone=TIMEZONE,
|
|
16
|
+
)
|
|
11
17
|
logging.getLogger("SQLQueryContextLogger").setLevel(logging.CRITICAL)
|
|
12
18
|
|
|
13
19
|
DEFAULT_LOGGER: Final[logging.Logger] = logger
|
fabricks/context/runtime.py
CHANGED
|
@@ -1,293 +1,117 @@
|
|
|
1
|
-
import logging
|
|
2
|
-
import os
|
|
3
1
|
from typing import Final, List, Optional
|
|
4
2
|
|
|
5
3
|
import yaml
|
|
6
4
|
|
|
5
|
+
from fabricks.context.config import path_config, path_runtime
|
|
7
6
|
from fabricks.utils.path import Path
|
|
8
|
-
from fabricks.utils.spark import spark
|
|
9
7
|
|
|
8
|
+
with open(str(path_config)) as f:
|
|
9
|
+
data = yaml.safe_load(f)
|
|
10
10
|
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
import sys
|
|
11
|
+
conf: dict = [d["conf"] for d in data][0]
|
|
12
|
+
assert conf, "conf mandatory"
|
|
13
|
+
CONF_RUNTIME: Final[dict] = conf
|
|
15
14
|
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
15
|
+
BRONZE = CONF_RUNTIME.get("bronze", [{}])
|
|
16
|
+
SILVER = CONF_RUNTIME.get("silver", [{}])
|
|
17
|
+
GOLD = CONF_RUNTIME.get("gold", [{}])
|
|
18
|
+
STEPS = BRONZE + SILVER + GOLD
|
|
20
19
|
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
path = path.parent
|
|
20
|
+
databases = CONF_RUNTIME.get("databases", [{}])
|
|
21
|
+
credentials = CONF_RUNTIME.get("credentials", {})
|
|
22
|
+
variables = CONF_RUNTIME.get("variables", {})
|
|
23
|
+
VARIABLES: dict = variables
|
|
26
24
|
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
config = tomllib.load(f)
|
|
30
|
-
return path, config.get("tool", {}).get("fabricks", {})
|
|
25
|
+
conf_options = CONF_RUNTIME.get("options", {})
|
|
26
|
+
assert conf_options, "options mandatory"
|
|
31
27
|
|
|
32
|
-
|
|
28
|
+
IS_UNITY_CATALOG: Final[bool] = str(conf_options.get("unity_catalog", "False")).lower() in ("true", "1", "yes")
|
|
29
|
+
CATALOG: Optional[str] = conf_options.get("catalog")
|
|
33
30
|
|
|
31
|
+
if IS_UNITY_CATALOG and not CATALOG:
|
|
32
|
+
raise ValueError("catalog mandatory in options when unity_catalog is enabled")
|
|
34
33
|
|
|
35
|
-
|
|
36
|
-
|
|
34
|
+
secret_scope = conf_options.get("secret_scope")
|
|
35
|
+
assert secret_scope, "secret_scope mandatory in options"
|
|
36
|
+
SECRET_SCOPE: Final[str] = secret_scope
|
|
37
37
|
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
if runtime is None:
|
|
41
|
-
if runtime := pyproject_config.get("runtime"):
|
|
42
|
-
assert pyproject_path is not None # Cannot be null since we got the config from it
|
|
43
|
-
runtime = pyproject_path.joinpath(runtime) # Must resolve relative to pyproject.toml
|
|
38
|
+
timezone = conf_options.get("timezone")
|
|
39
|
+
TIMEZONE: Final[str] = timezone
|
|
44
40
|
|
|
45
|
-
|
|
46
|
-
runtime = pyproject_path
|
|
47
|
-
elif runtime is None:
|
|
48
|
-
raise ValueError("Must have at least a pyproject.toml or set FABRICKS_RUNTIME")
|
|
41
|
+
IS_TYPE_WIDENING: Final[bool] = str(conf_options.get("type_widening", "True")).lower() in ("true", "1", "yes")
|
|
49
42
|
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
PATH_RUNTIME: Final[Path] = path_runtime
|
|
43
|
+
path_options = CONF_RUNTIME.get("path_options", {})
|
|
44
|
+
assert path_options, "options mandatory"
|
|
53
45
|
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
if notebooks := pyproject_config.get("notebooks"):
|
|
58
|
-
assert pyproject_path is not None
|
|
59
|
-
notebooks = pyproject_path.joinpath(notebooks)
|
|
46
|
+
fabricks_uri = path_options.get("storage")
|
|
47
|
+
assert fabricks_uri, "storage mandatory in path options"
|
|
48
|
+
FABRICKS_STORAGE: Final[Path] = Path.from_uri(fabricks_uri, regex=variables)
|
|
60
49
|
|
|
61
|
-
|
|
62
|
-
assert notebooks, "notebooks mandatory"
|
|
63
|
-
PATH_NOTEBOOKS: Final[Path] = Path(str(notebooks), assume_git=True)
|
|
50
|
+
FABRICKS_STORAGE_CREDENTIAL: Final[Optional[str]] = path_options.get("storage_credential")
|
|
64
51
|
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
52
|
+
path_udfs = path_options.get("udfs", "fabricks/udfs")
|
|
53
|
+
assert path_udfs, "path to udfs mandatory"
|
|
54
|
+
PATH_UDFS: Final[Path] = path_runtime.joinpath(path_udfs)
|
|
68
55
|
|
|
69
|
-
|
|
56
|
+
path_parsers = path_options.get("parsers", "fabricks/parsers")
|
|
57
|
+
assert path_parsers, "path to parsers mandatory"
|
|
58
|
+
PATH_PARSERS: Final[Path] = path_runtime.joinpath(path_parsers)
|
|
70
59
|
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
60
|
+
path_extenders = path_options.get("extenders", "fabricks/extenders")
|
|
61
|
+
assert path_extenders, "path to extenders mandatory"
|
|
62
|
+
PATH_EXTENDERS: Final[Path] = path_runtime.joinpath(path_extenders)
|
|
74
63
|
|
|
75
|
-
|
|
64
|
+
path_views = path_options.get("views", "fabricks/views")
|
|
65
|
+
assert path_views, "path to views mandatory"
|
|
66
|
+
PATH_VIEWS: Final[Path] = path_runtime.joinpath(path_views)
|
|
76
67
|
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
68
|
+
path_schedules = path_options.get("schedules", "fabricks/schedules")
|
|
69
|
+
assert path_schedules, "path to schedules mandatory"
|
|
70
|
+
PATH_SCHEDULES: Final[Path] = path_runtime.joinpath(path_schedules)
|
|
80
71
|
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
elif loglevel == "INFO":
|
|
85
|
-
_loglevel = logging.INFO
|
|
86
|
-
elif loglevel == "WARNING":
|
|
87
|
-
_loglevel = logging.WARNING
|
|
88
|
-
elif loglevel == "ERROR":
|
|
89
|
-
_loglevel = logging.ERROR
|
|
90
|
-
elif loglevel == "CRITICAL":
|
|
91
|
-
_loglevel = logging.CRITICAL
|
|
92
|
-
else:
|
|
93
|
-
raise ValueError(f"{loglevel} not allowed. Use DEBUG, INFO, WARNING, ERROR or CRITICAL")
|
|
72
|
+
path_requirements = path_options.get("requirements", "fabricks/requirements")
|
|
73
|
+
assert path_requirements, "path to requirements mandatory"
|
|
74
|
+
PATH_REQUIREMENTS: Final[Path] = path_runtime.joinpath(path_requirements)
|
|
94
75
|
|
|
95
|
-
|
|
76
|
+
path_masks = path_options.get("masks", "fabricks/masks")
|
|
77
|
+
assert path_masks, "path to masks mandatory"
|
|
78
|
+
PATH_MASKS: Final[Path] = path_runtime.joinpath(path_masks)
|
|
96
79
|
|
|
97
|
-
config_path = os.environ.get("FABRICKS_CONFIG")
|
|
98
|
-
if config_path is None:
|
|
99
|
-
if config_path := pyproject_config.get("config"):
|
|
100
|
-
assert pyproject_path is not None # Cannot be null since we got the config from it
|
|
101
|
-
config_path = pyproject_path.joinpath(config_path)
|
|
102
|
-
else:
|
|
103
|
-
config_path = PATH_RUNTIME.joinpath(config_path).string if config_path else None
|
|
104
80
|
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
).string
|
|
110
|
-
|
|
111
|
-
PATH_CONFIG: Final[Path] = Path(config_path, assume_git=True)
|
|
112
|
-
|
|
113
|
-
with open(config_path) as f:
|
|
114
|
-
data = yaml.safe_load(f)
|
|
115
|
-
|
|
116
|
-
conf: dict = [d["conf"] for d in data][0]
|
|
117
|
-
assert conf, "conf mandatory"
|
|
118
|
-
CONF_RUNTIME: Final[dict] = conf
|
|
119
|
-
|
|
120
|
-
BRONZE = CONF_RUNTIME.get("bronze", [{}])
|
|
121
|
-
SILVER = CONF_RUNTIME.get("silver", [{}])
|
|
122
|
-
GOLD = CONF_RUNTIME.get("gold", [{}])
|
|
123
|
-
STEPS = BRONZE + SILVER + GOLD
|
|
124
|
-
|
|
125
|
-
databases = CONF_RUNTIME.get("databases", [{}])
|
|
126
|
-
credentials = CONF_RUNTIME.get("credentials", {})
|
|
127
|
-
variables = CONF_RUNTIME.get("variables", {})
|
|
128
|
-
VARIABLES: dict = variables
|
|
129
|
-
|
|
130
|
-
conf_options = CONF_RUNTIME.get("options", {})
|
|
131
|
-
assert conf_options, "options mandatory"
|
|
132
|
-
|
|
133
|
-
IS_UNITY_CATALOG: Final[bool] = str(conf_options.get("unity_catalog", "False")).lower() in ("true", "1", "yes")
|
|
134
|
-
CATALOG: Optional[str] = conf_options.get("catalog")
|
|
135
|
-
|
|
136
|
-
if IS_UNITY_CATALOG and not CATALOG:
|
|
137
|
-
raise ValueError("catalog mandatory in options when unity_catalog is enabled")
|
|
138
|
-
|
|
139
|
-
secret_scope = conf_options.get("secret_scope")
|
|
140
|
-
assert secret_scope, "secret_scope mandatory in options"
|
|
141
|
-
SECRET_SCOPE: Final[str] = secret_scope
|
|
142
|
-
|
|
143
|
-
IS_TYPE_WIDENING: Final[bool] = str(conf_options.get("type_widening", "True")).lower() in ("true", "1", "yes")
|
|
144
|
-
|
|
145
|
-
path_options = CONF_RUNTIME.get("path_options", {})
|
|
146
|
-
assert path_options, "options mandatory"
|
|
147
|
-
|
|
148
|
-
fabricks_uri = path_options.get("storage")
|
|
149
|
-
assert fabricks_uri, "storage mandatory in path options"
|
|
150
|
-
FABRICKS_STORAGE: Final[Path] = Path.from_uri(fabricks_uri, regex=variables)
|
|
151
|
-
|
|
152
|
-
FABRICKS_STORAGE_CREDENTIAL: Final[Optional[str]] = path_options.get("storage_credential")
|
|
153
|
-
|
|
154
|
-
path_udfs = path_options.get("udfs")
|
|
155
|
-
assert path_udfs, "udfs mandatory in path options"
|
|
156
|
-
PATH_UDFS: Final[Path] = PATH_RUNTIME.joinpath(path_udfs)
|
|
157
|
-
|
|
158
|
-
path_parsers = path_options.get("parsers")
|
|
159
|
-
assert path_parsers, "parsers mandatory in path options"
|
|
160
|
-
PATH_PARSERS: Final[Path] = PATH_RUNTIME.joinpath(path_parsers)
|
|
161
|
-
|
|
162
|
-
path_extenders = path_options.get("extenders")
|
|
163
|
-
assert path_extenders, "extenders mandatory in path options"
|
|
164
|
-
PATH_EXTENDERS: Final[Path] = PATH_RUNTIME.joinpath(path_extenders)
|
|
165
|
-
|
|
166
|
-
path_views = path_options.get("views")
|
|
167
|
-
assert path_views, "views mandatory in path options"
|
|
168
|
-
PATH_VIEWS: Final[Path] = PATH_RUNTIME.joinpath(path_views)
|
|
169
|
-
|
|
170
|
-
path_schedules = path_options.get("schedules")
|
|
171
|
-
assert path_schedules, "schedules mandatory in path options"
|
|
172
|
-
PATH_SCHEDULES: Final[Path] = PATH_RUNTIME.joinpath(path_schedules)
|
|
173
|
-
|
|
174
|
-
path_requirements = path_options.get("requirements")
|
|
175
|
-
assert path_requirements, "requirements mandatory in path options"
|
|
176
|
-
PATH_REQUIREMENTS: Final[Path] = PATH_RUNTIME.joinpath(path_requirements)
|
|
177
|
-
|
|
178
|
-
def _get_storage_paths(objects: List[dict]) -> dict:
|
|
179
|
-
d = {}
|
|
180
|
-
for o in objects:
|
|
181
|
-
if o:
|
|
182
|
-
name = o.get("name")
|
|
183
|
-
assert name
|
|
184
|
-
uri = o.get("path_options", {}).get("storage")
|
|
185
|
-
assert uri
|
|
186
|
-
d[name] = Path.from_uri(uri, regex=variables)
|
|
187
|
-
return d
|
|
188
|
-
|
|
189
|
-
PATHS_STORAGE: Final[dict[str, Path]] = {
|
|
190
|
-
"fabricks": FABRICKS_STORAGE,
|
|
191
|
-
**_get_storage_paths(BRONZE),
|
|
192
|
-
**_get_storage_paths(SILVER),
|
|
193
|
-
**_get_storage_paths(GOLD),
|
|
194
|
-
**_get_storage_paths(databases),
|
|
195
|
-
}
|
|
196
|
-
|
|
197
|
-
def _get_runtime_path(objects: List[dict]) -> dict:
|
|
198
|
-
d = {}
|
|
199
|
-
for o in objects:
|
|
81
|
+
def _get_storage_paths(objects: List[dict]) -> dict:
|
|
82
|
+
d = {}
|
|
83
|
+
for o in objects:
|
|
84
|
+
if o:
|
|
200
85
|
name = o.get("name")
|
|
201
86
|
assert name
|
|
202
|
-
uri = o.get("path_options", {}).get("
|
|
87
|
+
uri = o.get("path_options", {}).get("storage")
|
|
203
88
|
assert uri
|
|
204
|
-
d[name] =
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
print(f" Log Level: {logging.getLevelName(LOGLEVEL)}")
|
|
234
|
-
print(f" Debug Mode: {'✓' if IS_DEBUGMODE else '✗'}")
|
|
235
|
-
print(f" Job Config from YAML: {'✓' if IS_JOB_CONFIG_FROM_YAML else '✗'}")
|
|
236
|
-
print(f" Type Widening: {'✓' if IS_TYPE_WIDENING else '✗'}")
|
|
237
|
-
|
|
238
|
-
print("\n🔄 PIPELINE STEPS:")
|
|
239
|
-
|
|
240
|
-
def _print_steps(steps_list, layer_name, icon):
|
|
241
|
-
if steps_list and any(step for step in steps_list if step):
|
|
242
|
-
print(f" {icon} {layer_name}:")
|
|
243
|
-
for step in steps_list:
|
|
244
|
-
if step:
|
|
245
|
-
step_name = step.get("name", "Unnamed")
|
|
246
|
-
print(f" • {step_name}")
|
|
247
|
-
else:
|
|
248
|
-
print(f" {icon} {layer_name}: No steps")
|
|
249
|
-
|
|
250
|
-
_print_steps(BRONZE, "Bronze", "🥉")
|
|
251
|
-
_print_steps(SILVER, "Silver", "🥈")
|
|
252
|
-
_print_steps(GOLD, "Gold", "🥇")
|
|
253
|
-
|
|
254
|
-
# Storage Configuration Section
|
|
255
|
-
print("\n💾 STORAGE CONFIGURATION:")
|
|
256
|
-
print(f" Storage URI: {FABRICKS_STORAGE.string}")
|
|
257
|
-
print(f" Storage Credential: {FABRICKS_STORAGE_CREDENTIAL or 'Not configured'}")
|
|
258
|
-
|
|
259
|
-
# Unity Catalog Section
|
|
260
|
-
print("\n🏛️ UNITY CATALOG:")
|
|
261
|
-
print(f" Enabled: {'✓' if IS_UNITY_CATALOG else '✗'}")
|
|
262
|
-
if IS_UNITY_CATALOG and CATALOG:
|
|
263
|
-
print(f" Catalog: {CATALOG}")
|
|
264
|
-
|
|
265
|
-
# Security Section
|
|
266
|
-
print("\n🔐 SECURITY:")
|
|
267
|
-
print(f" Secret Scope: {SECRET_SCOPE}")
|
|
268
|
-
|
|
269
|
-
# Component Paths Section
|
|
270
|
-
print("\n🛠️ COMPONENT PATHS:")
|
|
271
|
-
components = [
|
|
272
|
-
("UDFs", PATH_UDFS),
|
|
273
|
-
("Parsers", PATH_PARSERS),
|
|
274
|
-
("Extenders", PATH_EXTENDERS),
|
|
275
|
-
("Views", PATH_VIEWS),
|
|
276
|
-
("Schedules", PATH_SCHEDULES),
|
|
277
|
-
("Requirements", PATH_REQUIREMENTS),
|
|
278
|
-
]
|
|
279
|
-
|
|
280
|
-
for name, path in components:
|
|
281
|
-
print(f" {name}: {path.string}")
|
|
282
|
-
|
|
283
|
-
# Storage Paths Section
|
|
284
|
-
print("\n📦 STORAGE PATHS:")
|
|
285
|
-
for name, path in sorted(PATHS_STORAGE.items()):
|
|
286
|
-
icon = "🏭" if name == "fabricks" else "📊"
|
|
287
|
-
print(f" {icon} {name}: {path.string}")
|
|
288
|
-
|
|
289
|
-
# Runtime Paths Section
|
|
290
|
-
if PATHS_RUNTIME:
|
|
291
|
-
print("\n⚡ RUNTIME PATHS:")
|
|
292
|
-
for name, path in sorted(PATHS_RUNTIME.items()):
|
|
293
|
-
print(f" 📂 {name}: {path.string}")
|
|
89
|
+
d[name] = Path.from_uri(uri, regex=variables)
|
|
90
|
+
return d
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
PATHS_STORAGE: Final[dict[str, Path]] = {
|
|
94
|
+
"fabricks": FABRICKS_STORAGE,
|
|
95
|
+
**_get_storage_paths(BRONZE),
|
|
96
|
+
**_get_storage_paths(SILVER),
|
|
97
|
+
**_get_storage_paths(GOLD),
|
|
98
|
+
**_get_storage_paths(databases),
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
def _get_runtime_path(objects: List[dict]) -> dict:
|
|
103
|
+
d = {}
|
|
104
|
+
for o in objects:
|
|
105
|
+
name = o.get("name")
|
|
106
|
+
assert name
|
|
107
|
+
uri = o.get("path_options", {}).get("runtime")
|
|
108
|
+
assert uri
|
|
109
|
+
d[name] = path_runtime.joinpath(uri)
|
|
110
|
+
return d
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
PATHS_RUNTIME: Final[dict[str, Path]] = {
|
|
114
|
+
**_get_runtime_path(BRONZE),
|
|
115
|
+
**_get_runtime_path(SILVER),
|
|
116
|
+
**_get_runtime_path(GOLD),
|
|
117
|
+
}
|
fabricks/context/secret.py
CHANGED
|
@@ -3,7 +3,7 @@ from typing import Optional
|
|
|
3
3
|
from pyspark.sql import SparkSession
|
|
4
4
|
from typing_extensions import deprecated
|
|
5
5
|
|
|
6
|
-
from fabricks.context
|
|
6
|
+
from fabricks.context import CATALOG, CONF_RUNTIME, IS_UNITY_CATALOG, SECRET_SCOPE
|
|
7
7
|
from fabricks.context.secret import add_secret_to_spark, get_secret_from_secret_scope
|
|
8
8
|
from fabricks.utils.spark import get_dbutils, get_spark
|
|
9
9
|
|