fabricks 3.0.5.2__py3-none-any.whl → 3.0.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (139) hide show
  1. fabricks/api/__init__.py +2 -0
  2. fabricks/api/context.py +1 -2
  3. fabricks/api/deploy.py +3 -0
  4. fabricks/api/job_schema.py +2 -2
  5. fabricks/api/masks.py +3 -0
  6. fabricks/api/notebooks/initialize.py +2 -2
  7. fabricks/api/notebooks/process.py +2 -2
  8. fabricks/api/notebooks/run.py +2 -2
  9. fabricks/api/notebooks/schedule.py +75 -0
  10. fabricks/api/notebooks/terminate.py +2 -2
  11. fabricks/api/schedules.py +2 -16
  12. fabricks/cdc/__init__.py +2 -2
  13. fabricks/cdc/base/__init__.py +2 -2
  14. fabricks/cdc/base/_types.py +9 -2
  15. fabricks/cdc/base/configurator.py +86 -41
  16. fabricks/cdc/base/generator.py +44 -35
  17. fabricks/cdc/base/merger.py +16 -14
  18. fabricks/cdc/base/processor.py +232 -144
  19. fabricks/cdc/nocdc.py +8 -7
  20. fabricks/cdc/templates/{query → ctes}/base.sql.jinja +7 -6
  21. fabricks/cdc/templates/ctes/current.sql.jinja +28 -0
  22. fabricks/cdc/templates/ctes/deduplicate_hash.sql.jinja +32 -0
  23. fabricks/cdc/templates/ctes/deduplicate_key.sql.jinja +31 -0
  24. fabricks/cdc/templates/{query → ctes}/rectify.sql.jinja +4 -22
  25. fabricks/cdc/templates/ctes/slice.sql.jinja +1 -0
  26. fabricks/cdc/templates/filter.sql.jinja +4 -4
  27. fabricks/cdc/templates/macros/bactick.sql.jinja +1 -0
  28. fabricks/cdc/templates/macros/hash.sql.jinja +18 -0
  29. fabricks/cdc/templates/merge.sql.jinja +3 -2
  30. fabricks/cdc/templates/merges/nocdc.sql.jinja +41 -0
  31. fabricks/cdc/templates/queries/context.sql.jinja +186 -0
  32. fabricks/cdc/templates/{query/nocdc.sql.jinja → queries/nocdc/complete.sql.jinja} +1 -1
  33. fabricks/cdc/templates/queries/nocdc/update.sql.jinja +35 -0
  34. fabricks/cdc/templates/{query → queries}/scd1.sql.jinja +2 -28
  35. fabricks/cdc/templates/{query → queries}/scd2.sql.jinja +29 -48
  36. fabricks/cdc/templates/query.sql.jinja +15 -11
  37. fabricks/context/__init__.py +18 -4
  38. fabricks/context/_types.py +2 -0
  39. fabricks/context/config/__init__.py +92 -0
  40. fabricks/context/config/utils.py +53 -0
  41. fabricks/context/log.py +8 -2
  42. fabricks/context/runtime.py +87 -263
  43. fabricks/context/secret.py +1 -1
  44. fabricks/context/spark_session.py +1 -1
  45. fabricks/context/utils.py +76 -0
  46. fabricks/core/dags/generator.py +6 -7
  47. fabricks/core/dags/log.py +2 -15
  48. fabricks/core/dags/processor.py +11 -11
  49. fabricks/core/dags/utils.py +15 -1
  50. fabricks/core/{scripts/job_schema.py → job_schema.py} +4 -0
  51. fabricks/core/jobs/base/_types.py +64 -22
  52. fabricks/core/jobs/base/checker.py +13 -12
  53. fabricks/core/jobs/base/configurator.py +41 -67
  54. fabricks/core/jobs/base/generator.py +55 -24
  55. fabricks/core/jobs/base/invoker.py +54 -30
  56. fabricks/core/jobs/base/processor.py +43 -26
  57. fabricks/core/jobs/bronze.py +45 -38
  58. fabricks/core/jobs/get_jobs.py +2 -2
  59. fabricks/core/jobs/get_schedule.py +10 -0
  60. fabricks/core/jobs/get_schedules.py +32 -0
  61. fabricks/core/jobs/gold.py +61 -48
  62. fabricks/core/jobs/silver.py +39 -40
  63. fabricks/core/masks.py +52 -0
  64. fabricks/core/parsers/base.py +2 -2
  65. fabricks/core/schedules/__init__.py +14 -0
  66. fabricks/core/schedules/diagrams.py +46 -0
  67. fabricks/core/schedules/get_schedule.py +5 -0
  68. fabricks/core/schedules/get_schedules.py +9 -0
  69. fabricks/core/schedules/run.py +3 -0
  70. fabricks/core/schedules/views.py +61 -0
  71. fabricks/core/steps/base.py +110 -72
  72. fabricks/core/udfs.py +12 -23
  73. fabricks/core/views.py +20 -13
  74. fabricks/deploy/__init__.py +97 -0
  75. fabricks/deploy/masks.py +8 -0
  76. fabricks/deploy/notebooks.py +71 -0
  77. fabricks/deploy/schedules.py +8 -0
  78. fabricks/{core/deploy → deploy}/tables.py +16 -13
  79. fabricks/{core/deploy → deploy}/udfs.py +3 -1
  80. fabricks/deploy/utils.py +36 -0
  81. fabricks/{core/deploy → deploy}/views.py +5 -9
  82. fabricks/metastore/database.py +3 -3
  83. fabricks/metastore/dbobject.py +4 -4
  84. fabricks/metastore/table.py +157 -88
  85. fabricks/metastore/view.py +13 -6
  86. fabricks/utils/_types.py +6 -0
  87. fabricks/utils/azure_table.py +4 -3
  88. fabricks/utils/helpers.py +141 -11
  89. fabricks/utils/log.py +29 -18
  90. fabricks/utils/read/_types.py +1 -1
  91. fabricks/utils/schema/get_schema_for_type.py +6 -0
  92. fabricks/utils/write/delta.py +3 -3
  93. {fabricks-3.0.5.2.dist-info → fabricks-3.0.6.dist-info}/METADATA +2 -1
  94. fabricks-3.0.6.dist-info/RECORD +175 -0
  95. fabricks/api/notebooks/add_fabricks.py +0 -13
  96. fabricks/api/notebooks/optimize.py +0 -29
  97. fabricks/api/notebooks/vacuum.py +0 -29
  98. fabricks/cdc/templates/query/context.sql.jinja +0 -101
  99. fabricks/cdc/templates/query/current.sql.jinja +0 -32
  100. fabricks/cdc/templates/query/deduplicate_hash.sql.jinja +0 -21
  101. fabricks/cdc/templates/query/deduplicate_key.sql.jinja +0 -14
  102. fabricks/cdc/templates/query/hash.sql.jinja +0 -1
  103. fabricks/cdc/templates/query/slice.sql.jinja +0 -14
  104. fabricks/config/__init__.py +0 -0
  105. fabricks/config/base.py +0 -8
  106. fabricks/config/fabricks/__init__.py +0 -26
  107. fabricks/config/fabricks/base.py +0 -90
  108. fabricks/config/fabricks/environment.py +0 -9
  109. fabricks/config/fabricks/pyproject.py +0 -47
  110. fabricks/config/jobs/__init__.py +0 -6
  111. fabricks/config/jobs/base.py +0 -101
  112. fabricks/config/jobs/bronze.py +0 -38
  113. fabricks/config/jobs/gold.py +0 -27
  114. fabricks/config/jobs/silver.py +0 -22
  115. fabricks/config/runtime.py +0 -67
  116. fabricks/config/steps/__init__.py +0 -6
  117. fabricks/config/steps/base.py +0 -50
  118. fabricks/config/steps/bronze.py +0 -7
  119. fabricks/config/steps/gold.py +0 -14
  120. fabricks/config/steps/silver.py +0 -15
  121. fabricks/core/deploy/__init__.py +0 -17
  122. fabricks/core/schedules.py +0 -142
  123. fabricks/core/scripts/__init__.py +0 -9
  124. fabricks/core/scripts/armageddon.py +0 -87
  125. fabricks/core/scripts/stats.py +0 -51
  126. fabricks/core/scripts/steps.py +0 -26
  127. fabricks-3.0.5.2.dist-info/RECORD +0 -177
  128. /fabricks/cdc/templates/{filter → filters}/final.sql.jinja +0 -0
  129. /fabricks/cdc/templates/{filter → filters}/latest.sql.jinja +0 -0
  130. /fabricks/cdc/templates/{filter → filters}/update.sql.jinja +0 -0
  131. /fabricks/cdc/templates/{merge → merges}/scd1.sql.jinja +0 -0
  132. /fabricks/cdc/templates/{merge → merges}/scd2.sql.jinja +0 -0
  133. /fabricks/cdc/templates/{query → queries}/__init__.py +0 -0
  134. /fabricks/cdc/templates/{query → queries}/final.sql.jinja +0 -0
  135. /fabricks/core/{utils.py → parsers/utils.py} +0 -0
  136. /fabricks/core/{scripts → schedules}/generate.py +0 -0
  137. /fabricks/core/{scripts → schedules}/process.py +0 -0
  138. /fabricks/core/{scripts → schedules}/terminate.py +0 -0
  139. {fabricks-3.0.5.2.dist-info → fabricks-3.0.6.dist-info}/WHEEL +0 -0
@@ -1,3 +1,12 @@
1
+ from fabricks.context.config import (
2
+ IS_DEBUGMODE,
3
+ IS_DEVMODE,
4
+ IS_JOB_CONFIG_FROM_YAML,
5
+ LOGLEVEL,
6
+ PATH_CONFIG,
7
+ PATH_NOTEBOOKS,
8
+ PATH_RUNTIME,
9
+ )
1
10
  from fabricks.context.runtime import (
2
11
  BRONZE,
3
12
  CATALOG,
@@ -5,15 +14,12 @@ from fabricks.context.runtime import (
5
14
  FABRICKS_STORAGE,
6
15
  FABRICKS_STORAGE_CREDENTIAL,
7
16
  GOLD,
8
- IS_JOB_CONFIG_FROM_YAML,
9
17
  IS_TYPE_WIDENING,
10
18
  IS_UNITY_CATALOG,
11
- LOGLEVEL,
12
- PATH_CONFIG,
13
19
  PATH_EXTENDERS,
20
+ PATH_MASKS,
14
21
  PATH_PARSERS,
15
22
  PATH_REQUIREMENTS,
16
- PATH_RUNTIME,
17
23
  PATH_SCHEDULES,
18
24
  PATH_UDFS,
19
25
  PATH_VIEWS,
@@ -22,9 +28,11 @@ from fabricks.context.runtime import (
22
28
  SECRET_SCOPE,
23
29
  SILVER,
24
30
  STEPS,
31
+ TIMEZONE,
25
32
  VARIABLES,
26
33
  )
27
34
  from fabricks.context.spark_session import DBUTILS, SPARK, build_spark_session, init_spark_session
35
+ from fabricks.context.utils import pprint_runtime
28
36
 
29
37
  __all__ = [
30
38
  "BRONZE",
@@ -36,12 +44,16 @@ __all__ = [
36
44
  "FABRICKS_STORAGE",
37
45
  "GOLD",
38
46
  "init_spark_session",
47
+ "IS_DEBUGMODE",
48
+ "IS_DEVMODE",
39
49
  "IS_JOB_CONFIG_FROM_YAML",
40
50
  "IS_TYPE_WIDENING",
41
51
  "IS_UNITY_CATALOG",
42
52
  "LOGLEVEL",
43
53
  "PATH_CONFIG",
44
54
  "PATH_EXTENDERS",
55
+ "PATH_MASKS",
56
+ "PATH_NOTEBOOKS",
45
57
  "PATH_PARSERS",
46
58
  "PATH_REQUIREMENTS",
47
59
  "PATH_RUNTIME",
@@ -49,10 +61,12 @@ __all__ = [
49
61
  "PATH_UDFS",
50
62
  "PATH_VIEWS",
51
63
  "PATHS_RUNTIME",
64
+ "pprint_runtime",
52
65
  "PATHS_STORAGE",
53
66
  "SECRET_SCOPE",
54
67
  "SILVER",
55
68
  "SPARK",
56
69
  "STEPS",
70
+ "TIMEZONE",
57
71
  "VARIABLES",
58
72
  ]
@@ -32,6 +32,7 @@ class RuntimeOptions(TypedDict):
32
32
  workers: int
33
33
  timeouts: RuntimeTimeoutOptions
34
34
  retention_days: int
35
+ timezone: Optional[str]
35
36
 
36
37
 
37
38
  class SparkOptions(TypedDict):
@@ -83,6 +84,7 @@ class TableOptions(TypedDict):
83
84
  liquid_clustering: Optional[bool]
84
85
  properties: Optional[dict[str, str]]
85
86
  retention_days: Optional[int]
87
+ masks: Optional[dict[str, str]]
86
88
 
87
89
 
88
90
  class Bronze(Step):
@@ -0,0 +1,92 @@
1
+ import logging
2
+ import os
3
+ from typing import Final
4
+
5
+ from fabricks.context.config.utils import get_config_from_file
6
+ from fabricks.utils.path import Path
7
+ from fabricks.utils.spark import spark
8
+
9
+ file_path, file_config = get_config_from_file()
10
+
11
+ runtime = os.environ.get("FABRICKS_RUNTIME", "none")
12
+ runtime = None if runtime.lower() == "none" else runtime
13
+ if runtime is None:
14
+ if runtime := file_config.get("runtime"):
15
+ assert file_path is not None
16
+ runtime = file_path.joinpath(runtime)
17
+
18
+ if runtime is None:
19
+ if file_path is not None:
20
+ runtime = file_path
21
+ else:
22
+ raise ValueError(
23
+ "could not resolve runtime (could not find pyproject.toml nor fabricksconfig.json nor FABRICKS_RUNTIME)"
24
+ )
25
+
26
+ path_runtime = Path(runtime, assume_git=True)
27
+ PATH_RUNTIME: Final[Path] = path_runtime
28
+
29
+ notebooks = os.environ.get("FABRICKS_NOTEBOOKS", "none")
30
+ notebooks = None if notebooks.lower() == "none" else notebooks
31
+ if notebooks is None:
32
+ if notebooks := file_config.get("notebooks"):
33
+ assert file_path is not None
34
+ notebooks = file_path.joinpath(notebooks)
35
+
36
+ notebooks = notebooks if notebooks else path_runtime.joinpath("notebooks")
37
+ PATH_NOTEBOOKS: Final[Path] = Path(str(notebooks), assume_git=True)
38
+
39
+ is_job_config_from_yaml = os.environ.get("FABRICKS_IS_JOB_CONFIG_FROM_YAML", None)
40
+ if is_job_config_from_yaml is None:
41
+ assert file_path is not None
42
+ is_job_config_from_yaml = file_config.get("job_config_from_yaml")
43
+
44
+ IS_JOB_CONFIG_FROM_YAML: Final[bool] = str(is_job_config_from_yaml).lower() in ("true", "1", "yes")
45
+
46
+ is_debugmode = os.environ.get("FABRICKS_IS_DEBUGMODE", None)
47
+ if is_debugmode is None:
48
+ is_debugmode = file_config.get("debugmode")
49
+
50
+ IS_DEBUGMODE: Final[bool] = str(is_debugmode).lower() in ("true", "1", "yes")
51
+
52
+ is_devmode = os.environ.get("FABRICKS_IS_DEVMODE", None)
53
+ if is_devmode is None:
54
+ is_devmode = file_config.get("devmode")
55
+
56
+ IS_DEVMODE: Final[bool] = str(is_devmode).lower() in ("true", "1", "yes")
57
+
58
+ loglevel = os.environ.get("FABRICKS_LOGLEVEL", None)
59
+ if loglevel is None:
60
+ loglevel = file_config.get("loglevel")
61
+
62
+ loglevel = loglevel.upper() if loglevel else "INFO"
63
+ if loglevel == "DEBUG":
64
+ _loglevel = logging.DEBUG
65
+ elif loglevel == "INFO":
66
+ _loglevel = logging.INFO
67
+ elif loglevel == "WARNING":
68
+ _loglevel = logging.WARNING
69
+ elif loglevel == "ERROR":
70
+ _loglevel = logging.ERROR
71
+ elif loglevel == "CRITICAL":
72
+ _loglevel = logging.CRITICAL
73
+ else:
74
+ raise ValueError(f"could not resolve {loglevel} (DEBUG, INFO, WARNING, ERROR or CRITICAL)")
75
+
76
+ LOGLEVEL = _loglevel
77
+
78
+ path_config = os.environ.get("FABRICKS_CONFIG")
79
+ if path_config is None:
80
+ if path_config := file_config.get("config"):
81
+ assert file_path is not None
82
+ path_config = file_path.joinpath(path_config)
83
+ else:
84
+ path_config = PATH_RUNTIME.joinpath(path_config).string if path_config else None
85
+
86
+ if not path_config:
87
+ path_config = PATH_RUNTIME.joinpath(
88
+ "fabricks",
89
+ f"conf.{spark.conf.get('spark.databricks.clusterUsageTags.clusterOwnerOrgId')}.yml",
90
+ ).string
91
+
92
+ PATH_CONFIG: Final[Path] = Path(path_config, assume_git=True)
@@ -0,0 +1,53 @@
1
+ def get_config_from_toml():
2
+ import os
3
+ import pathlib
4
+ import sys
5
+
6
+ if sys.version_info >= (3, 11):
7
+ import tomllib
8
+ else:
9
+ import tomli as tomllib # type: ignore
10
+
11
+ path = pathlib.Path(os.getcwd())
12
+ while path is not None and not (path / "pyproject.toml").exists():
13
+ if path == path.parent:
14
+ break
15
+ path = path.parent
16
+
17
+ if (path / "pyproject.toml").exists():
18
+ with open((path / "pyproject.toml"), "rb") as f:
19
+ config = tomllib.load(f)
20
+ return path, config.get("tool", {}).get("fabricks", {})
21
+
22
+ return None, {}
23
+
24
+
25
+ def get_config_from_json():
26
+ import json
27
+ import os
28
+ import pathlib
29
+
30
+ path = pathlib.Path(os.getcwd())
31
+ while path is not None and not (path / "fabricksconfig.json").exists():
32
+ if path == path.parent:
33
+ break
34
+ path = path.parent
35
+
36
+ if (path / "fabricksconfig.json").exists():
37
+ with open((path / "fabricksconfig.json"), "r") as f:
38
+ config = json.load(f)
39
+ return path, config
40
+
41
+ return None, {}
42
+
43
+
44
+ def get_config_from_file():
45
+ json_path, json_config = get_config_from_json()
46
+ if json_config:
47
+ return json_path, json_config
48
+
49
+ pyproject_path, pyproject_config = get_config_from_toml()
50
+ if pyproject_config:
51
+ return pyproject_path, pyproject_config
52
+
53
+ return None, {}
fabricks/context/log.py CHANGED
@@ -4,10 +4,16 @@ from typing import Final, Literal, Optional
4
4
 
5
5
  import requests
6
6
 
7
- from fabricks.context.runtime import IS_DEBUGMODE, LOGLEVEL, SECRET_SCOPE
7
+ from fabricks.context import IS_DEBUGMODE, LOGLEVEL, SECRET_SCOPE, TIMEZONE
8
8
  from fabricks.utils.log import get_logger
9
9
 
10
- logger, _ = get_logger("logs", LOGLEVEL, table=None, debugmode=IS_DEBUGMODE)
10
+ logger, _ = get_logger(
11
+ "logs",
12
+ LOGLEVEL,
13
+ table=None,
14
+ debugmode=IS_DEBUGMODE,
15
+ timezone=TIMEZONE,
16
+ )
11
17
  logging.getLogger("SQLQueryContextLogger").setLevel(logging.CRITICAL)
12
18
 
13
19
  DEFAULT_LOGGER: Final[logging.Logger] = logger
@@ -1,293 +1,117 @@
1
- import logging
2
- import os
3
1
  from typing import Final, List, Optional
4
2
 
5
3
  import yaml
6
4
 
5
+ from fabricks.context.config import path_config, path_runtime
7
6
  from fabricks.utils.path import Path
8
- from fabricks.utils.spark import spark
9
7
 
8
+ with open(str(path_config)) as f:
9
+ data = yaml.safe_load(f)
10
10
 
11
- def get_config_from_toml():
12
- import os
13
- import pathlib
14
- import sys
11
+ conf: dict = [d["conf"] for d in data][0]
12
+ assert conf, "conf mandatory"
13
+ CONF_RUNTIME: Final[dict] = conf
15
14
 
16
- if sys.version_info >= (3, 11):
17
- import tomllib
18
- else:
19
- import tomli as tomllib # type: ignore
15
+ BRONZE = CONF_RUNTIME.get("bronze", [{}])
16
+ SILVER = CONF_RUNTIME.get("silver", [{}])
17
+ GOLD = CONF_RUNTIME.get("gold", [{}])
18
+ STEPS = BRONZE + SILVER + GOLD
20
19
 
21
- path = pathlib.Path(os.getcwd())
22
- while path is not None and not (path / "pyproject.toml").exists():
23
- if path == path.parent:
24
- break
25
- path = path.parent
20
+ databases = CONF_RUNTIME.get("databases", [{}])
21
+ credentials = CONF_RUNTIME.get("credentials", {})
22
+ variables = CONF_RUNTIME.get("variables", {})
23
+ VARIABLES: dict = variables
26
24
 
27
- if (path / "pyproject.toml").exists():
28
- with open((path / "pyproject.toml"), "rb") as f:
29
- config = tomllib.load(f)
30
- return path, config.get("tool", {}).get("fabricks", {})
25
+ conf_options = CONF_RUNTIME.get("options", {})
26
+ assert conf_options, "options mandatory"
31
27
 
32
- return None, {}
28
+ IS_UNITY_CATALOG: Final[bool] = str(conf_options.get("unity_catalog", "False")).lower() in ("true", "1", "yes")
29
+ CATALOG: Optional[str] = conf_options.get("catalog")
33
30
 
31
+ if IS_UNITY_CATALOG and not CATALOG:
32
+ raise ValueError("catalog mandatory in options when unity_catalog is enabled")
34
33
 
35
- try:
36
- pyproject_path, pyproject_config = get_config_from_toml()
34
+ secret_scope = conf_options.get("secret_scope")
35
+ assert secret_scope, "secret_scope mandatory in options"
36
+ SECRET_SCOPE: Final[str] = secret_scope
37
37
 
38
- runtime = os.environ.get("FABRICKS_RUNTIME", "none")
39
- runtime = None if runtime.lower() == "none" else runtime
40
- if runtime is None:
41
- if runtime := pyproject_config.get("runtime"):
42
- assert pyproject_path is not None # Cannot be null since we got the config from it
43
- runtime = pyproject_path.joinpath(runtime) # Must resolve relative to pyproject.toml
38
+ timezone = conf_options.get("timezone")
39
+ TIMEZONE: Final[str] = timezone
44
40
 
45
- if runtime is None and pyproject_path is not None:
46
- runtime = pyproject_path
47
- elif runtime is None:
48
- raise ValueError("Must have at least a pyproject.toml or set FABRICKS_RUNTIME")
41
+ IS_TYPE_WIDENING: Final[bool] = str(conf_options.get("type_widening", "True")).lower() in ("true", "1", "yes")
49
42
 
50
- path_runtime = Path(runtime, assume_git=True)
51
- assert path_runtime, "runtime mandatory in cluster config"
52
- PATH_RUNTIME: Final[Path] = path_runtime
43
+ path_options = CONF_RUNTIME.get("path_options", {})
44
+ assert path_options, "options mandatory"
53
45
 
54
- notebooks = os.environ.get("FABRICKS_NOTEBOOKS", "none")
55
- notebooks = None if notebooks.lower() == "none" else notebooks
56
- if notebooks is None:
57
- if notebooks := pyproject_config.get("notebooks"):
58
- assert pyproject_path is not None
59
- notebooks = pyproject_path.joinpath(notebooks)
46
+ fabricks_uri = path_options.get("storage")
47
+ assert fabricks_uri, "storage mandatory in path options"
48
+ FABRICKS_STORAGE: Final[Path] = Path.from_uri(fabricks_uri, regex=variables)
60
49
 
61
- notebooks = notebooks if notebooks else path_runtime.joinpath("notebooks")
62
- assert notebooks, "notebooks mandatory"
63
- PATH_NOTEBOOKS: Final[Path] = Path(str(notebooks), assume_git=True)
50
+ FABRICKS_STORAGE_CREDENTIAL: Final[Optional[str]] = path_options.get("storage_credential")
64
51
 
65
- is_job_config_from_yaml = os.environ.get("FABRICKS_IS_JOB_CONFIG_FROM_YAML", None)
66
- if is_job_config_from_yaml is None:
67
- is_job_config_from_yaml = pyproject_config.get("job_config_from_yaml")
52
+ path_udfs = path_options.get("udfs", "fabricks/udfs")
53
+ assert path_udfs, "path to udfs mandatory"
54
+ PATH_UDFS: Final[Path] = path_runtime.joinpath(path_udfs)
68
55
 
69
- IS_JOB_CONFIG_FROM_YAML: Final[bool] = str(is_job_config_from_yaml).lower() in ("true", "1", "yes")
56
+ path_parsers = path_options.get("parsers", "fabricks/parsers")
57
+ assert path_parsers, "path to parsers mandatory"
58
+ PATH_PARSERS: Final[Path] = path_runtime.joinpath(path_parsers)
70
59
 
71
- is_debugmode = os.environ.get("FABRICKS_IS_DEBUGMODE", None)
72
- if is_debugmode is None:
73
- is_debugmode = pyproject_config.get("debugmode")
60
+ path_extenders = path_options.get("extenders", "fabricks/extenders")
61
+ assert path_extenders, "path to extenders mandatory"
62
+ PATH_EXTENDERS: Final[Path] = path_runtime.joinpath(path_extenders)
74
63
 
75
- IS_DEBUGMODE: Final[bool] = str(is_debugmode).lower() in ("true", "1", "yes")
64
+ path_views = path_options.get("views", "fabricks/views")
65
+ assert path_views, "path to views mandatory"
66
+ PATH_VIEWS: Final[Path] = path_runtime.joinpath(path_views)
76
67
 
77
- loglevel = os.environ.get("FABRICKS_LOGLEVEL", None)
78
- if loglevel is None:
79
- loglevel = pyproject_config.get("loglevel")
68
+ path_schedules = path_options.get("schedules", "fabricks/schedules")
69
+ assert path_schedules, "path to schedules mandatory"
70
+ PATH_SCHEDULES: Final[Path] = path_runtime.joinpath(path_schedules)
80
71
 
81
- loglevel = loglevel.upper() if loglevel else "INFO"
82
- if loglevel == "DEBUG":
83
- _loglevel = logging.DEBUG
84
- elif loglevel == "INFO":
85
- _loglevel = logging.INFO
86
- elif loglevel == "WARNING":
87
- _loglevel = logging.WARNING
88
- elif loglevel == "ERROR":
89
- _loglevel = logging.ERROR
90
- elif loglevel == "CRITICAL":
91
- _loglevel = logging.CRITICAL
92
- else:
93
- raise ValueError(f"{loglevel} not allowed. Use DEBUG, INFO, WARNING, ERROR or CRITICAL")
72
+ path_requirements = path_options.get("requirements", "fabricks/requirements")
73
+ assert path_requirements, "path to requirements mandatory"
74
+ PATH_REQUIREMENTS: Final[Path] = path_runtime.joinpath(path_requirements)
94
75
 
95
- LOGLEVEL = _loglevel
76
+ path_masks = path_options.get("masks", "fabricks/masks")
77
+ assert path_masks, "path to masks mandatory"
78
+ PATH_MASKS: Final[Path] = path_runtime.joinpath(path_masks)
96
79
 
97
- config_path = os.environ.get("FABRICKS_CONFIG")
98
- if config_path is None:
99
- if config_path := pyproject_config.get("config"):
100
- assert pyproject_path is not None # Cannot be null since we got the config from it
101
- config_path = pyproject_path.joinpath(config_path)
102
- else:
103
- config_path = PATH_RUNTIME.joinpath(config_path).string if config_path else None
104
80
 
105
- if not config_path:
106
- config_path = PATH_RUNTIME.joinpath(
107
- "fabricks",
108
- f"conf.{spark.conf.get('spark.databricks.clusterUsageTags.clusterOwnerOrgId')}.yml",
109
- ).string
110
-
111
- PATH_CONFIG: Final[Path] = Path(config_path, assume_git=True)
112
-
113
- with open(config_path) as f:
114
- data = yaml.safe_load(f)
115
-
116
- conf: dict = [d["conf"] for d in data][0]
117
- assert conf, "conf mandatory"
118
- CONF_RUNTIME: Final[dict] = conf
119
-
120
- BRONZE = CONF_RUNTIME.get("bronze", [{}])
121
- SILVER = CONF_RUNTIME.get("silver", [{}])
122
- GOLD = CONF_RUNTIME.get("gold", [{}])
123
- STEPS = BRONZE + SILVER + GOLD
124
-
125
- databases = CONF_RUNTIME.get("databases", [{}])
126
- credentials = CONF_RUNTIME.get("credentials", {})
127
- variables = CONF_RUNTIME.get("variables", {})
128
- VARIABLES: dict = variables
129
-
130
- conf_options = CONF_RUNTIME.get("options", {})
131
- assert conf_options, "options mandatory"
132
-
133
- IS_UNITY_CATALOG: Final[bool] = str(conf_options.get("unity_catalog", "False")).lower() in ("true", "1", "yes")
134
- CATALOG: Optional[str] = conf_options.get("catalog")
135
-
136
- if IS_UNITY_CATALOG and not CATALOG:
137
- raise ValueError("catalog mandatory in options when unity_catalog is enabled")
138
-
139
- secret_scope = conf_options.get("secret_scope")
140
- assert secret_scope, "secret_scope mandatory in options"
141
- SECRET_SCOPE: Final[str] = secret_scope
142
-
143
- IS_TYPE_WIDENING: Final[bool] = str(conf_options.get("type_widening", "True")).lower() in ("true", "1", "yes")
144
-
145
- path_options = CONF_RUNTIME.get("path_options", {})
146
- assert path_options, "options mandatory"
147
-
148
- fabricks_uri = path_options.get("storage")
149
- assert fabricks_uri, "storage mandatory in path options"
150
- FABRICKS_STORAGE: Final[Path] = Path.from_uri(fabricks_uri, regex=variables)
151
-
152
- FABRICKS_STORAGE_CREDENTIAL: Final[Optional[str]] = path_options.get("storage_credential")
153
-
154
- path_udfs = path_options.get("udfs")
155
- assert path_udfs, "udfs mandatory in path options"
156
- PATH_UDFS: Final[Path] = PATH_RUNTIME.joinpath(path_udfs)
157
-
158
- path_parsers = path_options.get("parsers")
159
- assert path_parsers, "parsers mandatory in path options"
160
- PATH_PARSERS: Final[Path] = PATH_RUNTIME.joinpath(path_parsers)
161
-
162
- path_extenders = path_options.get("extenders")
163
- assert path_extenders, "extenders mandatory in path options"
164
- PATH_EXTENDERS: Final[Path] = PATH_RUNTIME.joinpath(path_extenders)
165
-
166
- path_views = path_options.get("views")
167
- assert path_views, "views mandatory in path options"
168
- PATH_VIEWS: Final[Path] = PATH_RUNTIME.joinpath(path_views)
169
-
170
- path_schedules = path_options.get("schedules")
171
- assert path_schedules, "schedules mandatory in path options"
172
- PATH_SCHEDULES: Final[Path] = PATH_RUNTIME.joinpath(path_schedules)
173
-
174
- path_requirements = path_options.get("requirements")
175
- assert path_requirements, "requirements mandatory in path options"
176
- PATH_REQUIREMENTS: Final[Path] = PATH_RUNTIME.joinpath(path_requirements)
177
-
178
- def _get_storage_paths(objects: List[dict]) -> dict:
179
- d = {}
180
- for o in objects:
181
- if o:
182
- name = o.get("name")
183
- assert name
184
- uri = o.get("path_options", {}).get("storage")
185
- assert uri
186
- d[name] = Path.from_uri(uri, regex=variables)
187
- return d
188
-
189
- PATHS_STORAGE: Final[dict[str, Path]] = {
190
- "fabricks": FABRICKS_STORAGE,
191
- **_get_storage_paths(BRONZE),
192
- **_get_storage_paths(SILVER),
193
- **_get_storage_paths(GOLD),
194
- **_get_storage_paths(databases),
195
- }
196
-
197
- def _get_runtime_path(objects: List[dict]) -> dict:
198
- d = {}
199
- for o in objects:
81
+ def _get_storage_paths(objects: List[dict]) -> dict:
82
+ d = {}
83
+ for o in objects:
84
+ if o:
200
85
  name = o.get("name")
201
86
  assert name
202
- uri = o.get("path_options", {}).get("runtime")
87
+ uri = o.get("path_options", {}).get("storage")
203
88
  assert uri
204
- d[name] = PATH_RUNTIME.joinpath(uri)
205
- return d
206
-
207
- PATHS_RUNTIME: Final[dict[str, Path]] = {
208
- **_get_runtime_path(BRONZE),
209
- **_get_runtime_path(SILVER),
210
- **_get_runtime_path(GOLD),
211
- }
212
-
213
- except KeyError as e:
214
- raise e
215
-
216
- except AssertionError as e:
217
- raise e
218
-
219
-
220
- def pprint_runtime():
221
- print("=" * 60)
222
- print("FABRICKS RUNTIME CONFIGURATION")
223
- print("=" * 60)
224
-
225
- # Core Paths Section
226
- print("\n📁 CORE PATHS:")
227
- print(f" Runtime: {PATH_RUNTIME.string}")
228
- print(f" Notebooks: {PATH_NOTEBOOKS.string}")
229
- print(f" Config: {PATH_CONFIG.string}")
230
-
231
- # Runtime Settings Section
232
- print("\n⚙️ RUNTIME SETTINGS:")
233
- print(f" Log Level: {logging.getLevelName(LOGLEVEL)}")
234
- print(f" Debug Mode: {'✓' if IS_DEBUGMODE else '✗'}")
235
- print(f" Job Config from YAML: {'✓' if IS_JOB_CONFIG_FROM_YAML else '✗'}")
236
- print(f" Type Widening: {'✓' if IS_TYPE_WIDENING else '✗'}")
237
-
238
- print("\n🔄 PIPELINE STEPS:")
239
-
240
- def _print_steps(steps_list, layer_name, icon):
241
- if steps_list and any(step for step in steps_list if step):
242
- print(f" {icon} {layer_name}:")
243
- for step in steps_list:
244
- if step:
245
- step_name = step.get("name", "Unnamed")
246
- print(f" • {step_name}")
247
- else:
248
- print(f" {icon} {layer_name}: No steps")
249
-
250
- _print_steps(BRONZE, "Bronze", "🥉")
251
- _print_steps(SILVER, "Silver", "🥈")
252
- _print_steps(GOLD, "Gold", "🥇")
253
-
254
- # Storage Configuration Section
255
- print("\n💾 STORAGE CONFIGURATION:")
256
- print(f" Storage URI: {FABRICKS_STORAGE.string}")
257
- print(f" Storage Credential: {FABRICKS_STORAGE_CREDENTIAL or 'Not configured'}")
258
-
259
- # Unity Catalog Section
260
- print("\n🏛️ UNITY CATALOG:")
261
- print(f" Enabled: {'✓' if IS_UNITY_CATALOG else '✗'}")
262
- if IS_UNITY_CATALOG and CATALOG:
263
- print(f" Catalog: {CATALOG}")
264
-
265
- # Security Section
266
- print("\n🔐 SECURITY:")
267
- print(f" Secret Scope: {SECRET_SCOPE}")
268
-
269
- # Component Paths Section
270
- print("\n🛠️ COMPONENT PATHS:")
271
- components = [
272
- ("UDFs", PATH_UDFS),
273
- ("Parsers", PATH_PARSERS),
274
- ("Extenders", PATH_EXTENDERS),
275
- ("Views", PATH_VIEWS),
276
- ("Schedules", PATH_SCHEDULES),
277
- ("Requirements", PATH_REQUIREMENTS),
278
- ]
279
-
280
- for name, path in components:
281
- print(f" {name}: {path.string}")
282
-
283
- # Storage Paths Section
284
- print("\n📦 STORAGE PATHS:")
285
- for name, path in sorted(PATHS_STORAGE.items()):
286
- icon = "🏭" if name == "fabricks" else "📊"
287
- print(f" {icon} {name}: {path.string}")
288
-
289
- # Runtime Paths Section
290
- if PATHS_RUNTIME:
291
- print("\n⚡ RUNTIME PATHS:")
292
- for name, path in sorted(PATHS_RUNTIME.items()):
293
- print(f" 📂 {name}: {path.string}")
89
+ d[name] = Path.from_uri(uri, regex=variables)
90
+ return d
91
+
92
+
93
+ PATHS_STORAGE: Final[dict[str, Path]] = {
94
+ "fabricks": FABRICKS_STORAGE,
95
+ **_get_storage_paths(BRONZE),
96
+ **_get_storage_paths(SILVER),
97
+ **_get_storage_paths(GOLD),
98
+ **_get_storage_paths(databases),
99
+ }
100
+
101
+
102
+ def _get_runtime_path(objects: List[dict]) -> dict:
103
+ d = {}
104
+ for o in objects:
105
+ name = o.get("name")
106
+ assert name
107
+ uri = o.get("path_options", {}).get("runtime")
108
+ assert uri
109
+ d[name] = path_runtime.joinpath(uri)
110
+ return d
111
+
112
+
113
+ PATHS_RUNTIME: Final[dict[str, Path]] = {
114
+ **_get_runtime_path(BRONZE),
115
+ **_get_runtime_path(SILVER),
116
+ **_get_runtime_path(GOLD),
117
+ }
@@ -7,7 +7,7 @@ from typing import Optional
7
7
 
8
8
  from pyspark.sql import SparkSession
9
9
 
10
- from fabricks.context.runtime import IS_UNITY_CATALOG
10
+ from fabricks.context import IS_UNITY_CATALOG
11
11
  from fabricks.utils.spark import spark as _spark
12
12
 
13
13
 
@@ -3,7 +3,7 @@ from typing import Optional
3
3
  from pyspark.sql import SparkSession
4
4
  from typing_extensions import deprecated
5
5
 
6
- from fabricks.context.runtime import CATALOG, CONF_RUNTIME, IS_UNITY_CATALOG, SECRET_SCOPE
6
+ from fabricks.context import CATALOG, CONF_RUNTIME, IS_UNITY_CATALOG, SECRET_SCOPE
7
7
  from fabricks.context.secret import add_secret_to_spark, get_secret_from_secret_scope
8
8
  from fabricks.utils.spark import get_dbutils, get_spark
9
9