fabricks 3.0.11__py3-none-any.whl → 3.0.13__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- fabricks/cdc/base/merger.py +1 -0
- fabricks/cdc/base/processor.py +4 -3
- fabricks/context/config.py +127 -0
- fabricks/context/helpers.py +58 -0
- fabricks/context/runtime.py +18 -42
- fabricks/core/dags/processor.py +3 -1
- fabricks/core/extenders.py +2 -7
- fabricks/core/jobs/base/generator.py +20 -2
- fabricks/core/parsers/get_parser.py +2 -7
- fabricks/core/steps/base.py +16 -8
- fabricks/deploy/views.py +8 -3
- fabricks/metastore/table.py +39 -1
- fabricks/utils/azure_table.py +23 -22
- fabricks/utils/helpers.py +18 -2
- fabricks/utils/path.py +8 -2
- {fabricks-3.0.11.dist-info → fabricks-3.0.13.dist-info}/METADATA +1 -1
- {fabricks-3.0.11.dist-info → fabricks-3.0.13.dist-info}/RECORD +18 -18
- {fabricks-3.0.11.dist-info → fabricks-3.0.13.dist-info}/WHEEL +1 -1
- fabricks/context/config/__init__.py +0 -92
- fabricks/context/config/utils.py +0 -53
fabricks/cdc/base/merger.py
CHANGED
fabricks/cdc/base/processor.py
CHANGED
|
@@ -390,11 +390,12 @@ class Processor(Generator):
|
|
|
390
390
|
try:
|
|
391
391
|
sql = template.render(**context)
|
|
392
392
|
if fix:
|
|
393
|
-
DEFAULT_LOGGER.debug("fix context", extra={"label": self, "sql": sql})
|
|
394
393
|
sql = self.fix_sql(sql)
|
|
394
|
+
else:
|
|
395
|
+
DEFAULT_LOGGER.debug("print query", extra={"label": self, "sql": sql})
|
|
395
396
|
|
|
396
397
|
except (Exception, TypeError) as e:
|
|
397
|
-
DEFAULT_LOGGER.exception("fail to
|
|
398
|
+
DEFAULT_LOGGER.exception("fail to render sql query", extra={"label": self, "context": context})
|
|
398
399
|
raise e
|
|
399
400
|
|
|
400
401
|
row = self.spark.sql(sql).collect()[0]
|
|
@@ -425,7 +426,7 @@ class Processor(Generator):
|
|
|
425
426
|
|
|
426
427
|
except (Exception, TypeError) as e:
|
|
427
428
|
DEFAULT_LOGGER.debug("context", extra={"label": self, "context": context})
|
|
428
|
-
DEFAULT_LOGGER.exception("fail to
|
|
429
|
+
DEFAULT_LOGGER.exception("fail to render sql query", extra={"label": self, "context": context})
|
|
429
430
|
raise e
|
|
430
431
|
|
|
431
432
|
return sql
|
|
@@ -0,0 +1,127 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
import os
|
|
3
|
+
from typing import Final
|
|
4
|
+
|
|
5
|
+
from fabricks.context.helpers import get_config_from_file
|
|
6
|
+
from fabricks.utils.log import get_logger
|
|
7
|
+
from fabricks.utils.path import Path
|
|
8
|
+
from fabricks.utils.spark import spark
|
|
9
|
+
|
|
10
|
+
logger, _ = get_logger("logs", level=logging.DEBUG)
|
|
11
|
+
file_path, file_config, origin = get_config_from_file()
|
|
12
|
+
|
|
13
|
+
if file_path:
|
|
14
|
+
logger.debug(f"found {origin} config ({file_path})", extra={"label": "config"})
|
|
15
|
+
|
|
16
|
+
# path to runtime
|
|
17
|
+
runtime = os.environ.get("FABRICKS_RUNTIME", "none")
|
|
18
|
+
runtime = None if runtime.lower() == "none" else runtime
|
|
19
|
+
if runtime is None:
|
|
20
|
+
if runtime := file_config.get("runtime"):
|
|
21
|
+
assert file_path is not None
|
|
22
|
+
runtime = file_path.joinpath(runtime)
|
|
23
|
+
logger.debug(f"resolve runtime from {origin} file", extra={"label": "config"})
|
|
24
|
+
else:
|
|
25
|
+
logger.debug("resolve runtime from env", extra={"label": "config"})
|
|
26
|
+
|
|
27
|
+
if runtime is None:
|
|
28
|
+
if file_path is not None:
|
|
29
|
+
runtime = file_path
|
|
30
|
+
logger.debug(f"resolve runtime from {origin} file", extra={"label": "config"})
|
|
31
|
+
else:
|
|
32
|
+
raise ValueError(
|
|
33
|
+
"could not resolve runtime (could not find pyproject.toml nor fabricksconfig.json nor FABRICKS_RUNTIME)"
|
|
34
|
+
)
|
|
35
|
+
|
|
36
|
+
path_runtime = Path(runtime, assume_git=True)
|
|
37
|
+
|
|
38
|
+
# path to config
|
|
39
|
+
config = os.environ.get("FABRICKS_CONFIG")
|
|
40
|
+
if config is None:
|
|
41
|
+
if config := file_config.get("config"):
|
|
42
|
+
assert file_path is not None
|
|
43
|
+
config = file_path.joinpath(config)
|
|
44
|
+
logger.debug(f"resolve config from {origin} file", extra={"label": "config"})
|
|
45
|
+
else:
|
|
46
|
+
logger.debug("resolve config from env", extra={"label": "config"})
|
|
47
|
+
|
|
48
|
+
if config is None:
|
|
49
|
+
logger.debug("resolve config from default path", extra={"label": "config"})
|
|
50
|
+
config = path_runtime.joinpath(
|
|
51
|
+
"fabricks",
|
|
52
|
+
f"conf.{spark.conf.get('spark.databricks.clusterUsageTags.clusterOwnerOrgId')}.yml",
|
|
53
|
+
).string
|
|
54
|
+
|
|
55
|
+
path_config = Path(config, assume_git=True)
|
|
56
|
+
|
|
57
|
+
# path to notebooks
|
|
58
|
+
notebooks = os.environ.get("FABRICKS_NOTEBOOKS", "none")
|
|
59
|
+
notebooks = None if notebooks.lower() == "none" else notebooks
|
|
60
|
+
if notebooks is None:
|
|
61
|
+
if notebooks := file_config.get("notebooks"):
|
|
62
|
+
assert file_path is not None
|
|
63
|
+
notebooks = file_path.joinpath(notebooks)
|
|
64
|
+
logger.debug(f"resolve notebooks from {origin} file", extra={"label": "config"})
|
|
65
|
+
else:
|
|
66
|
+
logger.debug("resolve notebooks from env", extra={"label": "config"})
|
|
67
|
+
|
|
68
|
+
if notebooks is None:
|
|
69
|
+
logger.debug("resolve notebooks from default path", extra={"label": "config"})
|
|
70
|
+
notebooks = path_runtime.joinpath("notebooks")
|
|
71
|
+
|
|
72
|
+
path_notebooks = Path(str(notebooks), assume_git=True)
|
|
73
|
+
|
|
74
|
+
# job config from yaml
|
|
75
|
+
is_job_config_from_yaml = os.environ.get("FABRICKS_IS_JOB_CONFIG_FROM_YAML", None)
|
|
76
|
+
if is_job_config_from_yaml is None:
|
|
77
|
+
if is_job_config_from_yaml := file_config.get("job_config_from_yaml"):
|
|
78
|
+
logger.debug(f"resolve job_config_from_yaml from {origin} file", extra={"label": "config"})
|
|
79
|
+
else:
|
|
80
|
+
logger.debug("resolve job_config_from_yaml from env", extra={"label": "config"})
|
|
81
|
+
|
|
82
|
+
# debug mode
|
|
83
|
+
is_debugmode = os.environ.get("FABRICKS_IS_DEBUGMODE", None)
|
|
84
|
+
if is_debugmode is None:
|
|
85
|
+
if is_debugmode := file_config.get("debugmode"):
|
|
86
|
+
logger.debug(f"resolve debugmode from {origin} file", extra={"label": "config"})
|
|
87
|
+
else:
|
|
88
|
+
logger.debug("resolve debugmode from env", extra={"label": "config"})
|
|
89
|
+
|
|
90
|
+
# dev mode
|
|
91
|
+
is_devmode = os.environ.get("FABRICKS_IS_DEVMODE", None)
|
|
92
|
+
if is_devmode is None:
|
|
93
|
+
if is_devmode := file_config.get("devmode"):
|
|
94
|
+
logger.debug(f"resolve devmode from {origin} file", extra={"label": "config"})
|
|
95
|
+
else:
|
|
96
|
+
logger.debug("resolve devmode from env", extra={"label": "config"})
|
|
97
|
+
|
|
98
|
+
# log level
|
|
99
|
+
loglevel = os.environ.get("FABRICKS_LOGLEVEL", None)
|
|
100
|
+
if loglevel is None:
|
|
101
|
+
if loglevel := file_config.get("loglevel"):
|
|
102
|
+
logger.debug(f"resolve loglevel from {origin} file", extra={"label": "config"})
|
|
103
|
+
else:
|
|
104
|
+
logger.debug("resolve loglevel from env", extra={"label": "config"})
|
|
105
|
+
|
|
106
|
+
loglevel = loglevel.upper() if loglevel else "INFO"
|
|
107
|
+
if loglevel == "DEBUG":
|
|
108
|
+
_loglevel = logging.DEBUG
|
|
109
|
+
elif loglevel == "INFO":
|
|
110
|
+
_loglevel = logging.INFO
|
|
111
|
+
elif loglevel == "WARNING":
|
|
112
|
+
_loglevel = logging.WARNING
|
|
113
|
+
elif loglevel == "ERROR":
|
|
114
|
+
_loglevel = logging.ERROR
|
|
115
|
+
elif loglevel == "CRITICAL":
|
|
116
|
+
_loglevel = logging.CRITICAL
|
|
117
|
+
else:
|
|
118
|
+
raise ValueError(f"could not resolve {loglevel} (DEBUG, INFO, WARNING, ERROR or CRITICAL)")
|
|
119
|
+
|
|
120
|
+
# Constants
|
|
121
|
+
PATH_CONFIG: Final[Path] = path_config
|
|
122
|
+
PATH_RUNTIME: Final[Path] = path_runtime
|
|
123
|
+
PATH_NOTEBOOKS: Final[Path] = path_notebooks
|
|
124
|
+
IS_JOB_CONFIG_FROM_YAML: Final[bool] = str(is_job_config_from_yaml).lower() in ("true", "1", "yes")
|
|
125
|
+
IS_DEBUGMODE: Final[bool] = str(is_debugmode).lower() in ("true", "1", "yes")
|
|
126
|
+
IS_DEVMODE: Final[bool] = str(is_devmode).lower() in ("true", "1", "yes")
|
|
127
|
+
LOGLEVEL: Final[int] = _loglevel
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import pathlib
|
|
3
|
+
from typing import List
|
|
4
|
+
|
|
5
|
+
from fabricks.utils.path import Path
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def get_config_from_file():
|
|
9
|
+
path = pathlib.Path(os.getcwd())
|
|
10
|
+
|
|
11
|
+
while path is not None and (not (path / "pyproject.toml").exists() or (path / "fabricksconfig.json").exists()):
|
|
12
|
+
if path == path.parent:
|
|
13
|
+
break
|
|
14
|
+
path = path.parent
|
|
15
|
+
|
|
16
|
+
if (path / "fabricksconfig.json").exists():
|
|
17
|
+
import json
|
|
18
|
+
|
|
19
|
+
with open((path / "fabricksconfig.json"), "r") as f:
|
|
20
|
+
config = json.load(f)
|
|
21
|
+
return path, config, "json"
|
|
22
|
+
|
|
23
|
+
if (path / "pyproject.toml").exists():
|
|
24
|
+
import sys
|
|
25
|
+
|
|
26
|
+
if sys.version_info >= (3, 11):
|
|
27
|
+
import tomllib
|
|
28
|
+
else:
|
|
29
|
+
import tomli as tomllib # type: ignore
|
|
30
|
+
|
|
31
|
+
with open((path / "pyproject.toml"), "rb") as f:
|
|
32
|
+
config = tomllib.load(f)
|
|
33
|
+
return path, config.get("tool", {}).get("fabricks", {}), "pyproject"
|
|
34
|
+
|
|
35
|
+
return None, {}, None
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def get_storage_paths(objects: List[dict], variables: dict) -> dict:
|
|
39
|
+
d = {}
|
|
40
|
+
for o in objects:
|
|
41
|
+
if o:
|
|
42
|
+
name = o.get("name")
|
|
43
|
+
assert name
|
|
44
|
+
uri = o.get("path_options", {}).get("storage")
|
|
45
|
+
assert uri
|
|
46
|
+
d[name] = Path.from_uri(uri, regex=variables)
|
|
47
|
+
return d
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def get_runtime_path(objects: List[dict], root: Path) -> dict:
|
|
51
|
+
d = {}
|
|
52
|
+
for o in objects:
|
|
53
|
+
name = o.get("name")
|
|
54
|
+
assert name
|
|
55
|
+
uri = o.get("path_options", {}).get("runtime")
|
|
56
|
+
assert uri
|
|
57
|
+
d[name] = root.joinpath(uri)
|
|
58
|
+
return d
|
fabricks/context/runtime.py
CHANGED
|
@@ -1,11 +1,12 @@
|
|
|
1
|
-
from typing import Final,
|
|
1
|
+
from typing import Final, Optional
|
|
2
2
|
|
|
3
3
|
import yaml
|
|
4
4
|
|
|
5
|
-
from fabricks.context.config import
|
|
5
|
+
from fabricks.context.config import PATH_CONFIG, PATH_RUNTIME
|
|
6
|
+
from fabricks.context.helpers import get_runtime_path, get_storage_paths
|
|
6
7
|
from fabricks.utils.path import Path
|
|
7
8
|
|
|
8
|
-
with open(str(
|
|
9
|
+
with open(str(PATH_CONFIG)) as f:
|
|
9
10
|
data = yaml.safe_load(f)
|
|
10
11
|
|
|
11
12
|
conf: dict = [d["conf"] for d in data][0]
|
|
@@ -51,67 +52,42 @@ FABRICKS_STORAGE_CREDENTIAL: Final[Optional[str]] = path_options.get("storage_cr
|
|
|
51
52
|
|
|
52
53
|
path_udfs = path_options.get("udfs", "fabricks/udfs")
|
|
53
54
|
assert path_udfs, "path to udfs mandatory"
|
|
54
|
-
PATH_UDFS: Final[Path] =
|
|
55
|
+
PATH_UDFS: Final[Path] = PATH_RUNTIME.joinpath(path_udfs)
|
|
55
56
|
|
|
56
57
|
path_parsers = path_options.get("parsers", "fabricks/parsers")
|
|
57
58
|
assert path_parsers, "path to parsers mandatory"
|
|
58
|
-
PATH_PARSERS: Final[Path] =
|
|
59
|
+
PATH_PARSERS: Final[Path] = PATH_RUNTIME.joinpath(path_parsers)
|
|
59
60
|
|
|
60
61
|
path_extenders = path_options.get("extenders", "fabricks/extenders")
|
|
61
62
|
assert path_extenders, "path to extenders mandatory"
|
|
62
|
-
PATH_EXTENDERS: Final[Path] =
|
|
63
|
+
PATH_EXTENDERS: Final[Path] = PATH_RUNTIME.joinpath(path_extenders)
|
|
63
64
|
|
|
64
65
|
path_views = path_options.get("views", "fabricks/views")
|
|
65
66
|
assert path_views, "path to views mandatory"
|
|
66
|
-
PATH_VIEWS: Final[Path] =
|
|
67
|
+
PATH_VIEWS: Final[Path] = PATH_RUNTIME.joinpath(path_views)
|
|
67
68
|
|
|
68
69
|
path_schedules = path_options.get("schedules", "fabricks/schedules")
|
|
69
70
|
assert path_schedules, "path to schedules mandatory"
|
|
70
|
-
PATH_SCHEDULES: Final[Path] =
|
|
71
|
+
PATH_SCHEDULES: Final[Path] = PATH_RUNTIME.joinpath(path_schedules)
|
|
71
72
|
|
|
72
73
|
path_requirements = path_options.get("requirements", "fabricks/requirements")
|
|
73
74
|
assert path_requirements, "path to requirements mandatory"
|
|
74
|
-
PATH_REQUIREMENTS: Final[Path] =
|
|
75
|
+
PATH_REQUIREMENTS: Final[Path] = PATH_RUNTIME.joinpath(path_requirements)
|
|
75
76
|
|
|
76
77
|
path_masks = path_options.get("masks", "fabricks/masks")
|
|
77
78
|
assert path_masks, "path to masks mandatory"
|
|
78
|
-
PATH_MASKS: Final[Path] =
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
def _get_storage_paths(objects: List[dict]) -> dict:
|
|
82
|
-
d = {}
|
|
83
|
-
for o in objects:
|
|
84
|
-
if o:
|
|
85
|
-
name = o.get("name")
|
|
86
|
-
assert name
|
|
87
|
-
uri = o.get("path_options", {}).get("storage")
|
|
88
|
-
assert uri
|
|
89
|
-
d[name] = Path.from_uri(uri, regex=variables)
|
|
90
|
-
return d
|
|
91
|
-
|
|
79
|
+
PATH_MASKS: Final[Path] = PATH_RUNTIME.joinpath(path_masks)
|
|
92
80
|
|
|
93
81
|
PATHS_STORAGE: Final[dict[str, Path]] = {
|
|
94
82
|
"fabricks": FABRICKS_STORAGE,
|
|
95
|
-
**
|
|
96
|
-
**
|
|
97
|
-
**
|
|
98
|
-
**
|
|
83
|
+
**get_storage_paths(BRONZE, variables),
|
|
84
|
+
**get_storage_paths(SILVER, variables),
|
|
85
|
+
**get_storage_paths(GOLD, variables),
|
|
86
|
+
**get_storage_paths(databases, variables),
|
|
99
87
|
}
|
|
100
88
|
|
|
101
|
-
|
|
102
|
-
def _get_runtime_path(objects: List[dict]) -> dict:
|
|
103
|
-
d = {}
|
|
104
|
-
for o in objects:
|
|
105
|
-
name = o.get("name")
|
|
106
|
-
assert name
|
|
107
|
-
uri = o.get("path_options", {}).get("runtime")
|
|
108
|
-
assert uri
|
|
109
|
-
d[name] = path_runtime.joinpath(uri)
|
|
110
|
-
return d
|
|
111
|
-
|
|
112
|
-
|
|
113
89
|
PATHS_RUNTIME: Final[dict[str, Path]] = {
|
|
114
|
-
**
|
|
115
|
-
**
|
|
116
|
-
**
|
|
90
|
+
**get_runtime_path(BRONZE, PATH_RUNTIME),
|
|
91
|
+
**get_runtime_path(SILVER, PATH_RUNTIME),
|
|
92
|
+
**get_runtime_path(GOLD, PATH_RUNTIME),
|
|
117
93
|
}
|
fabricks/core/dags/processor.py
CHANGED
|
@@ -10,7 +10,7 @@ from tenacity import retry, retry_if_exception_type, stop_after_attempt, wait_ex
|
|
|
10
10
|
|
|
11
11
|
from fabricks.context import PATH_NOTEBOOKS
|
|
12
12
|
from fabricks.core.dags.base import BaseDags
|
|
13
|
-
from fabricks.core.dags.log import LOGGER
|
|
13
|
+
from fabricks.core.dags.log import LOGGER, TABLE_LOG_HANDLER
|
|
14
14
|
from fabricks.core.dags.run import run
|
|
15
15
|
from fabricks.core.jobs.base._types import TStep
|
|
16
16
|
from fabricks.core.steps.get_step import get_step
|
|
@@ -148,7 +148,9 @@ class DagProcessor(BaseDags):
|
|
|
148
148
|
finally:
|
|
149
149
|
j["Status"] = "ok"
|
|
150
150
|
self.table.upsert(j)
|
|
151
|
+
|
|
151
152
|
LOGGER.info("end", extra=self.extra(j))
|
|
153
|
+
TABLE_LOG_HANDLER.flush()
|
|
152
154
|
|
|
153
155
|
dependencies = self.table.query(f"PartitionKey eq 'dependencies' and ParentId eq '{j.get('JobId')}'")
|
|
154
156
|
self.table.delete(dependencies)
|
fabricks/core/extenders.py
CHANGED
|
@@ -1,8 +1,8 @@
|
|
|
1
|
-
from importlib.util import module_from_spec, spec_from_file_location
|
|
2
1
|
from typing import Callable
|
|
3
2
|
|
|
4
3
|
from fabricks.context import IS_UNITY_CATALOG, PATH_EXTENDERS
|
|
5
4
|
from fabricks.context.log import DEFAULT_LOGGER
|
|
5
|
+
from fabricks.utils.helpers import load_module_from_path
|
|
6
6
|
|
|
7
7
|
EXTENDERS: dict[str, Callable] = {}
|
|
8
8
|
|
|
@@ -14,12 +14,7 @@ def get_extender(name: str) -> Callable:
|
|
|
14
14
|
else:
|
|
15
15
|
DEFAULT_LOGGER.debug(f"could not check if extender exists ({path.string})")
|
|
16
16
|
|
|
17
|
-
|
|
18
|
-
assert spec, "no valid extender found in {path.string}"
|
|
19
|
-
assert spec.loader is not None
|
|
20
|
-
|
|
21
|
-
mod = module_from_spec(spec)
|
|
22
|
-
spec.loader.exec_module(mod)
|
|
17
|
+
load_module_from_path(name, path)
|
|
23
18
|
e = EXTENDERS[name]
|
|
24
19
|
|
|
25
20
|
return e
|
|
@@ -336,10 +336,10 @@ class Generator(Configurator):
|
|
|
336
336
|
|
|
337
337
|
comment = self.options.table.get("comment")
|
|
338
338
|
if comment:
|
|
339
|
-
self.table.
|
|
339
|
+
self.table.add_table_comment(comment=comment)
|
|
340
340
|
|
|
341
341
|
else:
|
|
342
|
-
DEFAULT_LOGGER.debug("table exists,
|
|
342
|
+
DEFAULT_LOGGER.debug("table already exists, skipped creation", extra={"label": self})
|
|
343
343
|
|
|
344
344
|
def _update_schema(
|
|
345
345
|
self,
|
|
@@ -389,6 +389,24 @@ class Generator(Configurator):
|
|
|
389
389
|
def overwrite_schema(self, df: Optional[DataFrame] = None):
|
|
390
390
|
self._update_schema(df=df, overwrite=True)
|
|
391
391
|
|
|
392
|
+
def update_comments(self, table: Optional[bool] = True, columns: Optional[bool] = True):
|
|
393
|
+
if self.virtual:
|
|
394
|
+
return
|
|
395
|
+
|
|
396
|
+
if self.persist:
|
|
397
|
+
self.table.drop_comments()
|
|
398
|
+
|
|
399
|
+
if table:
|
|
400
|
+
comment = self.options.table.get("comment")
|
|
401
|
+
if comment:
|
|
402
|
+
self.table.add_table_comment(comment=comment)
|
|
403
|
+
|
|
404
|
+
if columns:
|
|
405
|
+
comments = self.options.table.get_dict("comments")
|
|
406
|
+
if comments:
|
|
407
|
+
for col, comment in comments.items():
|
|
408
|
+
self.table.add_column_comment(column=col, comment=comment)
|
|
409
|
+
|
|
392
410
|
def get_differences_with_deltatable(self, df: Optional[DataFrame] = None):
|
|
393
411
|
if df is None:
|
|
394
412
|
df = self.get_data(stream=self.stream)
|
|
@@ -1,9 +1,9 @@
|
|
|
1
|
-
from importlib.util import module_from_spec, spec_from_file_location
|
|
2
1
|
from typing import Optional
|
|
3
2
|
|
|
4
3
|
from fabricks.context import PATH_PARSERS
|
|
5
4
|
from fabricks.core.parsers._types import ParserOptions
|
|
6
5
|
from fabricks.core.parsers.base import PARSERS, BaseParser
|
|
6
|
+
from fabricks.utils.helpers import load_module_from_path
|
|
7
7
|
|
|
8
8
|
|
|
9
9
|
def get_parser(name: str, parser_options: Optional[ParserOptions] = None) -> BaseParser:
|
|
@@ -11,12 +11,7 @@ def get_parser(name: str, parser_options: Optional[ParserOptions] = None) -> Bas
|
|
|
11
11
|
path = PATH_PARSERS.joinpath(name).append(".py")
|
|
12
12
|
assert path.exists(), f"parser not found ({path})"
|
|
13
13
|
|
|
14
|
-
|
|
15
|
-
assert spec, f"parser not found ({path})"
|
|
16
|
-
assert spec.loader is not None
|
|
17
|
-
|
|
18
|
-
mod = module_from_spec(spec)
|
|
19
|
-
spec.loader.exec_module(mod)
|
|
14
|
+
load_module_from_path(name, path)
|
|
20
15
|
parser = PARSERS[name](parser_options)
|
|
21
16
|
|
|
22
17
|
else:
|
fabricks/core/steps/base.py
CHANGED
|
@@ -228,15 +228,22 @@ class BaseStep:
|
|
|
228
228
|
DEFAULT_LOGGER.exception("fail to get jobs", extra={"label": self})
|
|
229
229
|
raise e
|
|
230
230
|
|
|
231
|
-
def create_db_objects(
|
|
231
|
+
def create_db_objects(
|
|
232
|
+
self,
|
|
233
|
+
retry: Optional[bool] = True,
|
|
234
|
+
update_lists: Optional[bool] = True,
|
|
235
|
+
incremental: Optional[bool] = False,
|
|
236
|
+
) -> List[Dict]:
|
|
232
237
|
DEFAULT_LOGGER.info("create db objects", extra={"label": self})
|
|
233
238
|
|
|
234
239
|
df = self.get_jobs()
|
|
235
|
-
table_df = self.database.get_tables()
|
|
236
|
-
view_df = self.database.get_views()
|
|
237
240
|
|
|
238
|
-
|
|
239
|
-
|
|
241
|
+
if incremental:
|
|
242
|
+
table_df = self.database.get_tables()
|
|
243
|
+
view_df = self.database.get_views()
|
|
244
|
+
|
|
245
|
+
df = df.join(table_df, "job_id", how="left_anti")
|
|
246
|
+
df = df.join(view_df, "job_id", how="left_anti")
|
|
240
247
|
|
|
241
248
|
if df:
|
|
242
249
|
results = run_in_parallel(
|
|
@@ -248,15 +255,16 @@ class BaseStep:
|
|
|
248
255
|
loglevel=logging.CRITICAL,
|
|
249
256
|
)
|
|
250
257
|
|
|
251
|
-
|
|
252
|
-
|
|
258
|
+
if update_lists:
|
|
259
|
+
self.update_tables_list()
|
|
260
|
+
self.update_views_list()
|
|
253
261
|
|
|
254
262
|
errors = [res for res in results if res.get("error")]
|
|
255
263
|
|
|
256
264
|
if errors:
|
|
257
265
|
if retry:
|
|
258
266
|
DEFAULT_LOGGER.warning("retry to create jobs", extra={"label": self})
|
|
259
|
-
return self.create_db_objects(retry=False)
|
|
267
|
+
return self.create_db_objects(retry=False, update_lists=update_lists, incremental=incremental)
|
|
260
268
|
|
|
261
269
|
return errors
|
|
262
270
|
|
fabricks/deploy/views.py
CHANGED
|
@@ -276,6 +276,7 @@ def create_or_replace_logs_pivot_view():
|
|
|
276
276
|
l.step,
|
|
277
277
|
l.job,
|
|
278
278
|
l.job_id,
|
|
279
|
+
-- flags
|
|
279
280
|
collect_set(l.status) as statuses,
|
|
280
281
|
array_contains(statuses, 'skipped') as skipped,
|
|
281
282
|
array_contains(statuses, 'warned') as warned,
|
|
@@ -283,15 +284,19 @@ def create_or_replace_logs_pivot_view():
|
|
|
283
284
|
array_contains(statuses, 'failed') or (not done and not skipped) as failed,
|
|
284
285
|
not done and not failed and not skipped and array_contains(statuses, 'running') as timed_out,
|
|
285
286
|
not array_contains(statuses, 'running') as cancelled,
|
|
287
|
+
--
|
|
286
288
|
max(l.notebook_id) as notebook_id,
|
|
289
|
+
--
|
|
290
|
+
max(l.timestamp) filter (where l.status = 'running') as start_time,
|
|
291
|
+
max(l.timestamp) filter (where l.status in ('done', 'ok')) as end_time,
|
|
292
|
+
--
|
|
287
293
|
max(l.timestamp) filter (where l.status = 'scheduled' ) as scheduled_time,
|
|
288
294
|
max(l.timestamp) filter (where l.status = 'waiting' ) as waiting_time,
|
|
289
|
-
max(l.timestamp) filter (where l.status = 'running') as start_time,
|
|
290
295
|
max(l.timestamp) filter (where l.status = 'running' ) as running_time,
|
|
291
296
|
max(l.timestamp) filter (where l.status = 'done' ) as done_time,
|
|
292
297
|
max(l.timestamp) filter (where l.status = 'failed' ) as failed_time,
|
|
293
|
-
max(l.timestamp) filter(where l.status = 'ok') as
|
|
294
|
-
|
|
298
|
+
max(l.timestamp) filter (where l.status = 'ok') as ok_time,
|
|
299
|
+
--
|
|
295
300
|
max(l.exception) as exception
|
|
296
301
|
from
|
|
297
302
|
fabricks.logs l
|
fabricks/metastore/table.py
CHANGED
|
@@ -666,7 +666,45 @@ class Table(DbObject):
|
|
|
666
666
|
"""
|
|
667
667
|
)
|
|
668
668
|
|
|
669
|
-
def
|
|
669
|
+
def drop_comments(self):
|
|
670
|
+
self.drop_table_comment()
|
|
671
|
+
for col in self.columns:
|
|
672
|
+
self.drop_column_comment(col)
|
|
673
|
+
|
|
674
|
+
def drop_table_comment(self):
|
|
675
|
+
assert self.registered, f"{self} not registered"
|
|
676
|
+
|
|
677
|
+
DEFAULT_LOGGER.debug("drop table comment", extra={"label": self})
|
|
678
|
+
self.spark.sql(
|
|
679
|
+
f"""
|
|
680
|
+
comment on table {self.qualified_name}
|
|
681
|
+
is null;
|
|
682
|
+
"""
|
|
683
|
+
)
|
|
684
|
+
|
|
685
|
+
def drop_column_comment(self, column: str):
|
|
686
|
+
assert self.registered, f"{self} not registered"
|
|
687
|
+
|
|
688
|
+
DEFAULT_LOGGER.debug(f"drop comment from column {column}", extra={"label": self})
|
|
689
|
+
self.spark.sql(
|
|
690
|
+
f"""
|
|
691
|
+
comment on column {self.qualified_name}.`{column}`
|
|
692
|
+
is null;
|
|
693
|
+
"""
|
|
694
|
+
)
|
|
695
|
+
|
|
696
|
+
def add_column_comment(self, column: str, comment: str):
|
|
697
|
+
assert self.registered, f"{self} not registered"
|
|
698
|
+
|
|
699
|
+
DEFAULT_LOGGER.debug(f"add comment '{comment}' to column {column}", extra={"label": self})
|
|
700
|
+
self.spark.sql(
|
|
701
|
+
f"""
|
|
702
|
+
comment on column {self.qualified_name}.`{column}`
|
|
703
|
+
is '{comment}';
|
|
704
|
+
"""
|
|
705
|
+
)
|
|
706
|
+
|
|
707
|
+
def add_table_comment(self, comment: str):
|
|
670
708
|
assert self.registered, f"{self} not registered"
|
|
671
709
|
|
|
672
710
|
DEFAULT_LOGGER.debug(f"add comment '{comment}'", extra={"label": self})
|
fabricks/utils/azure_table.py
CHANGED
|
@@ -1,5 +1,4 @@
|
|
|
1
|
-
import
|
|
2
|
-
from typing import TYPE_CHECKING, List, Optional, Union
|
|
1
|
+
from typing import TYPE_CHECKING, Any, List, Optional, Union
|
|
3
2
|
|
|
4
3
|
from azure.data.tables import TableClient, TableServiceClient
|
|
5
4
|
from pyspark.sql import DataFrame
|
|
@@ -99,27 +98,29 @@ class AzureTable:
|
|
|
99
98
|
if self._table_client is not None:
|
|
100
99
|
self._table_client.close()
|
|
101
100
|
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
self.
|
|
101
|
+
@retry(
|
|
102
|
+
stop=stop_after_attempt(3),
|
|
103
|
+
wait=wait_exponential(multiplier=1, min=1, max=10),
|
|
104
|
+
retry=retry_if_exception_type((Exception)),
|
|
105
|
+
reraise=True,
|
|
106
|
+
)
|
|
107
|
+
def _submit_with_retry(self, data: Any):
|
|
108
|
+
self.table.submit_transaction(data)
|
|
109
|
+
|
|
110
|
+
def submit(self, operations: List):
|
|
111
|
+
partitions = set()
|
|
112
|
+
for d in operations:
|
|
113
|
+
partitions.add(d[1]["PartitionKey"])
|
|
114
|
+
|
|
115
|
+
for p in partitions:
|
|
116
|
+
_operations = [d for d in operations if d[1].get("PartitionKey") == p]
|
|
117
|
+
t = 50
|
|
118
|
+
if len(_operations) < t:
|
|
119
|
+
self._submit_with_retry(_operations)
|
|
121
120
|
else:
|
|
122
|
-
|
|
121
|
+
transactions = [_operations[i : i + t] for i in range(0, len(_operations), t)]
|
|
122
|
+
for transaction in transactions:
|
|
123
|
+
self._submit_with_retry(transaction)
|
|
123
124
|
|
|
124
125
|
def delete(self, data: Union[List, DataFrame, dict]):
|
|
125
126
|
if isinstance(data, DataFrameLike):
|
fabricks/utils/helpers.py
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import logging
|
|
2
|
+
import sys
|
|
2
3
|
from functools import reduce
|
|
3
4
|
from queue import Queue
|
|
4
5
|
from typing import Any, Callable, Iterable, List, Literal, Optional, Union
|
|
@@ -216,13 +217,28 @@ def run_notebook(path: Path, timeout: Optional[int] = None, **kwargs):
|
|
|
216
217
|
dbutils.notebook.run(path.get_notebook_path(), timeout, {**kwargs}) # type: ignore
|
|
217
218
|
|
|
218
219
|
|
|
219
|
-
def xxhash64(s: Any):
|
|
220
|
+
def xxhash64(s: Any) -> int:
|
|
220
221
|
df = spark.sql(f"select xxhash64(cast('{s}' as string)) as xxhash64")
|
|
221
222
|
return df.collect()[0][0]
|
|
222
223
|
|
|
223
224
|
|
|
224
|
-
def md5(s: Any):
|
|
225
|
+
def md5(s: Any) -> str:
|
|
225
226
|
from hashlib import md5
|
|
226
227
|
|
|
227
228
|
md5 = md5(str(s).encode())
|
|
228
229
|
return md5.hexdigest()
|
|
230
|
+
|
|
231
|
+
|
|
232
|
+
def load_module_from_path(name: str, path: Path):
|
|
233
|
+
from importlib.util import module_from_spec, spec_from_file_location
|
|
234
|
+
|
|
235
|
+
sys.path.append(str(path.parent))
|
|
236
|
+
|
|
237
|
+
spec = spec_from_file_location(name, path.string)
|
|
238
|
+
assert spec, f"no valid module found in {path.string}"
|
|
239
|
+
assert spec.loader is not None
|
|
240
|
+
|
|
241
|
+
textwrap_module = module_from_spec(spec)
|
|
242
|
+
spec.loader.exec_module(textwrap_module)
|
|
243
|
+
|
|
244
|
+
return textwrap_module
|
fabricks/utils/path.py
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import os
|
|
2
|
+
import posixpath
|
|
2
3
|
from pathlib import Path as PathlibPath
|
|
3
4
|
from typing import List, Optional, Union
|
|
4
5
|
|
|
@@ -120,8 +121,13 @@ class Path:
|
|
|
120
121
|
return False
|
|
121
122
|
|
|
122
123
|
def joinpath(self, *other):
|
|
123
|
-
|
|
124
|
-
|
|
124
|
+
parts = [str(o) for o in other]
|
|
125
|
+
base = self.string
|
|
126
|
+
|
|
127
|
+
joined = posixpath.join(base, *parts)
|
|
128
|
+
new = posixpath.normpath(joined)
|
|
129
|
+
|
|
130
|
+
return Path(path=new, assume_git=self.assume_git)
|
|
125
131
|
|
|
126
132
|
def append(self, other: str):
|
|
127
133
|
new_path = self.string + other
|
|
@@ -40,8 +40,8 @@ fabricks/cdc/base/_types.py,sha256=WloCDC3ATrn0aZJ6E8BRYKZx19N3EE56r6qlBYhcuvQ,2
|
|
|
40
40
|
fabricks/cdc/base/cdc.py,sha256=9w5BqQxSVbFVEozJWmZQThqdppkE_SYi4fHSzJ7WMvA,78
|
|
41
41
|
fabricks/cdc/base/configurator.py,sha256=w6Ywif87iv1WG-5OM3XkzIRrsns-_QQ6XlADpk0YLlw,6434
|
|
42
42
|
fabricks/cdc/base/generator.py,sha256=pa_GJn7Pdi5vMnXN8zExmOPMpCqdZ3QoxHEB0wv0lsk,5933
|
|
43
|
-
fabricks/cdc/base/merger.py,sha256=
|
|
44
|
-
fabricks/cdc/base/processor.py,sha256=
|
|
43
|
+
fabricks/cdc/base/merger.py,sha256=3qUUs0uqmwOMdXc50kV3Zo9omuQuUUFgtMLBrg4E-wk,4159
|
|
44
|
+
fabricks/cdc/base/processor.py,sha256=gL3pWMaBRsc0oB93ISnH2x07WbmtM_QEIx8qrUcUoZ0,17704
|
|
45
45
|
fabricks/cdc/templates/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
46
46
|
fabricks/cdc/templates/filter.sql.jinja,sha256=AQcOj4KRwKscVG3zepnEAN9Yxb03AM7068hqW7dtVI8,236
|
|
47
47
|
fabricks/cdc/templates/merge.sql.jinja,sha256=YS9wWckCVsUI1pUYiRSFBIuQ16WU3EPWSkhZVy2niBA,221
|
|
@@ -69,15 +69,15 @@ fabricks/cdc/templates/queries/nocdc/complete.sql.jinja,sha256=cVKKCSbiuuw1K7BOz
|
|
|
69
69
|
fabricks/cdc/templates/queries/nocdc/update.sql.jinja,sha256=mjNUwGVhZ08yUkdv9sCTkqyW60p0YavtWTqvSUVrwjA,1283
|
|
70
70
|
fabricks/context/__init__.py,sha256=qfntJ9O6omzY_t6AhDP6Ndu9C5LMiVdWbo6ikhtoe7o,1446
|
|
71
71
|
fabricks/context/_types.py,sha256=FzQJ35vp0uc6pAq18bc-VHwMVEWtd0VDdm8xQmNr2Sg,2681
|
|
72
|
+
fabricks/context/config.py,sha256=EmLUnswuWfrncaNJMDjvdMg-1lD8aneKAY8IDna7VPE,4814
|
|
73
|
+
fabricks/context/helpers.py,sha256=nyHF0yhTJ_cDQT5Q47z0CYh4QHOJyzyRNRBfrUYDIbU,1552
|
|
72
74
|
fabricks/context/log.py,sha256=CadrRf8iL6iXlGIGIhEIswa7wGqC-E-oLwWcGTyJ10s,2074
|
|
73
|
-
fabricks/context/runtime.py,sha256=
|
|
75
|
+
fabricks/context/runtime.py,sha256=87PtX6SqLoFd0PGxgisF6dLlxtCHaHxkMMIt34UyB2w,3479
|
|
74
76
|
fabricks/context/secret.py,sha256=iRM-KU-JcJAEOLoGJ8S4Oh65-yt674W6CDTSkOE7SXw,3192
|
|
75
77
|
fabricks/context/spark_session.py,sha256=BPaxKJXHZDI5oQiOPhmua_xjXnrVgluh--AVpvUgbck,2553
|
|
76
78
|
fabricks/context/utils.py,sha256=EQRscdUhdjwk2htZu8gCgNZ9PfRzzrR6e1kRrIbVlBM,2786
|
|
77
|
-
fabricks/context/config/__init__.py,sha256=pFEsGXBQkX5_FP0cwQMX427j6dQuTx81NR9snMxc8cU,3127
|
|
78
|
-
fabricks/context/config/utils.py,sha256=7KCTUiSbqQnDD5mbCO9_o1KbUgD-Xbei_UGgpMQi9nE,1371
|
|
79
79
|
fabricks/core/__init__.py,sha256=LaqDi4xuyHAoLOvS44PQdZdRfq9SmVr7mB6BDHyxYpc,209
|
|
80
|
-
fabricks/core/extenders.py,sha256=
|
|
80
|
+
fabricks/core/extenders.py,sha256=oJzfv0hWxusnGmrjMwbrGyKfot8xzA4XtNquPWfFgPo,727
|
|
81
81
|
fabricks/core/job_schema.py,sha256=6-70oy0ZJd3V9AiXfc0Q8b8NVEynxQza_h7mB13uB-s,853
|
|
82
82
|
fabricks/core/masks.py,sha256=3UCxcCi-TgFHB7xT5ZvmEa8RMKev23X_JLE70Pr3rpY,1347
|
|
83
83
|
fabricks/core/udfs.py,sha256=gu7K-ohxcO0TdgA7IjzVMOZatZQYhFTklHo60a6k_Yc,2960
|
|
@@ -86,7 +86,7 @@ fabricks/core/dags/__init__.py,sha256=0DUKzVcXcROvxkN19P_kaOJ7da5BAM7Vt8EGQbp2KS
|
|
|
86
86
|
fabricks/core/dags/base.py,sha256=tFj27SqeZUZ7pB_LOWkpdowZz5gj30JUANI4gWK3Pl8,3139
|
|
87
87
|
fabricks/core/dags/generator.py,sha256=4fp_CRsWnl_UauM9Jx-E4UCaxnm2_Q5103J58fRws2U,4832
|
|
88
88
|
fabricks/core/dags/log.py,sha256=v1xfpQGfddHDz9lflvXOWTXMde3CdERo9jzeSmNDRhY,402
|
|
89
|
-
fabricks/core/dags/processor.py,sha256=
|
|
89
|
+
fabricks/core/dags/processor.py,sha256=IzjqrpNu6lTYp-Rl2T_8Sb5N0pSk9BhdP9vuL4sBRMg,7930
|
|
90
90
|
fabricks/core/dags/run.py,sha256=RIDULb9WakObSyYzmkglh8RwFRwC8-NFC-1yPDMkBC0,1074
|
|
91
91
|
fabricks/core/dags/terminator.py,sha256=Y6pV2UnSyrCIx2AQWJXoHk5Roq12gZqpennHx_Lbnzs,793
|
|
92
92
|
fabricks/core/dags/utils.py,sha256=4kyobLGl4tO0Flo6LxNzYjCU_G42vns1LrkxTO5_KLY,1585
|
|
@@ -105,7 +105,7 @@ fabricks/core/jobs/base/_types.py,sha256=y66BtJlJskq7wGzn7te5XYjO-NEqeQGUC11kkbe
|
|
|
105
105
|
fabricks/core/jobs/base/checker.py,sha256=Cdfh8rQYy4MvMFl0HyC3alGUWm8zrFXk08m2t2JMu6Y,5477
|
|
106
106
|
fabricks/core/jobs/base/configurator.py,sha256=9G5F7Qg5FWHPbHgdh8Qxc85OoSX0rnjD4c9itwU5KKc,10415
|
|
107
107
|
fabricks/core/jobs/base/exception.py,sha256=HrdxEuOfK5rY-ItZvEL3iywLgdpYUpmWFkjjjks7oYc,2318
|
|
108
|
-
fabricks/core/jobs/base/generator.py,sha256=
|
|
108
|
+
fabricks/core/jobs/base/generator.py,sha256=Dk82tj21NhR9wwgXzMp8JlKQ6D9HnjVlK9fvDmoYLbk,17646
|
|
109
109
|
fabricks/core/jobs/base/invoker.py,sha256=FvjfpNqi542slxC2yLu1BIu5EklNUWySxDF8cD_SqKQ,7602
|
|
110
110
|
fabricks/core/jobs/base/job.py,sha256=dWmk2PpQH2NETaaDS6KoiefRnDHfDMdCyhmogkdcSFI,93
|
|
111
111
|
fabricks/core/jobs/base/processor.py,sha256=qkNiJSSLaEnivKGBcd9UZyIVFexnv-n1p_5mCZIy1rA,9076
|
|
@@ -113,7 +113,7 @@ fabricks/core/parsers/__init__.py,sha256=TGjyUeiiTkJrAxIpu2D_c2rQcbe5YRpmBW9oh0F
|
|
|
113
113
|
fabricks/core/parsers/_types.py,sha256=JC2Oh-wUvaX8SBzeuf5owPgRaj-Q3-7MXxyIYPQ7QwA,147
|
|
114
114
|
fabricks/core/parsers/base.py,sha256=P8IrLQKGakwaAQ-4gf4vElVwWoSpkixYd9kNthu1VDM,3292
|
|
115
115
|
fabricks/core/parsers/decorator.py,sha256=kn_Mj-JLWTFaRiciZ3KavmSUcWFPY3ve-buMruHrX_Q,307
|
|
116
|
-
fabricks/core/parsers/get_parser.py,sha256=
|
|
116
|
+
fabricks/core/parsers/get_parser.py,sha256=mauofS626h9wpPZtlZFqIb1jcKM3Jz4D_36uvd-Lv4k,717
|
|
117
117
|
fabricks/core/parsers/utils.py,sha256=qdn2ElpqBgDsW55-tACWZaFOT0ebrBYg2fenqSgd6YI,2456
|
|
118
118
|
fabricks/core/schedules/__init__.py,sha256=bDjNMcm7itimAo4gun0W4W9bZKwZmWUjkMqAQIcqI2Y,431
|
|
119
119
|
fabricks/core/schedules/diagrams.py,sha256=YA4T7Etl_UPfW-3IGFq5Xj9OlXZGQ27Aot6RVa3ZUgg,578
|
|
@@ -126,7 +126,7 @@ fabricks/core/schedules/terminate.py,sha256=-RvtOrxTOZl2sZQ6KfNHJL6H2LCAEMSVRyyl
|
|
|
126
126
|
fabricks/core/schedules/views.py,sha256=8hYwPLCvvN-nem2lNAKvUY5hC71v88z4-y8j0poUApM,1949
|
|
127
127
|
fabricks/core/steps/__init__.py,sha256=JP-kaDa890-9XqBSPp6YdssAexdxv-MqQ__WfVYdgeg,132
|
|
128
128
|
fabricks/core/steps/_types.py,sha256=VxIrH3nFwmPlwG-UI8sDDP0AwK_9jlsy6yQp6YfgtqE,90
|
|
129
|
-
fabricks/core/steps/base.py,sha256=
|
|
129
|
+
fabricks/core/steps/base.py,sha256=MJe2q9s1siM89YkpHDqldtbtKQgkhDB_cFa2-e_irvs,14642
|
|
130
130
|
fabricks/core/steps/get_step.py,sha256=8q4rEDdTTZNJsXB2l5XY-Ktoow8ZHsON_tx5yKMUIzg,284
|
|
131
131
|
fabricks/core/steps/get_step_conf.py,sha256=UPT3gB1Sh5yzawZ9qiVQlvVAKaxPX82gaWBDzxx75EM,633
|
|
132
132
|
fabricks/deploy/__init__.py,sha256=ntxtFnzeho_WneVoL5CCqbI4rKApKgdmi9j0HKb0LJc,2375
|
|
@@ -136,26 +136,26 @@ fabricks/deploy/schedules.py,sha256=0a5dU1rW6fg8aAp7TTt-l0DgR-4kmzsX2xxV2C30yaw,
|
|
|
136
136
|
fabricks/deploy/tables.py,sha256=IF822oxOCy12r08Dz54YUK5luud6dtTPxJ4TUIHE-No,2621
|
|
137
137
|
fabricks/deploy/udfs.py,sha256=7fw3O5LgOOxDEhuS3s1yFdqybgFh65r_1IdfZUYeejs,597
|
|
138
138
|
fabricks/deploy/utils.py,sha256=V41r1zVT9KcsICqTLAzpb4ixRk2q2ybJMrGhkPOtG6k,5099
|
|
139
|
-
fabricks/deploy/views.py,sha256=
|
|
139
|
+
fabricks/deploy/views.py,sha256=8cSt6IzZy-JHHkyqd91NT2hi3LTNTOolimlfSBXMCvU,14434
|
|
140
140
|
fabricks/metastore/README.md,sha256=utPUGAxmjyNMGe43GfL0Gup4MjeTKKwyiUoNVSfMquI,51
|
|
141
141
|
fabricks/metastore/__init__.py,sha256=RhjY2CuqtZBg8fEizzzvW8qszqCM-vSCL1tQGuzoato,174
|
|
142
142
|
fabricks/metastore/_types.py,sha256=NXYxwQHP0sCllM0N6QBbaK4CdtM_m_rHFDxRNRfBcLU,1919
|
|
143
143
|
fabricks/metastore/database.py,sha256=23VAKKzjrwlEaj28DNNmiOhcfdKRzYk8eEfq-PzINbg,1924
|
|
144
144
|
fabricks/metastore/dbobject.py,sha256=ve8p48OqEpJYsqWNhgesGSE0emM--uY8QrvBRoR3j3g,1881
|
|
145
145
|
fabricks/metastore/pyproject.toml,sha256=6RZM9RMKMDF_EAequhORZ7TD0BQNk7aBCTWAv-sRcp0,519
|
|
146
|
-
fabricks/metastore/table.py,sha256=
|
|
146
|
+
fabricks/metastore/table.py,sha256=AaoNL-1mz4A0CCb3tH_0BUurYPjA1oL5pioCYlEMtu4,29113
|
|
147
147
|
fabricks/metastore/utils.py,sha256=8SxhjDkz_aSH4IGUusel7hqOQxP9U8PNBCY0M7GH00Y,1355
|
|
148
148
|
fabricks/metastore/view.py,sha256=f7hKJWtnH1KmZym8dkoucKOTndntzai_f2YqferxHLs,1431
|
|
149
149
|
fabricks/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
150
150
|
fabricks/utils/_types.py,sha256=AuOhknlozqx5QdAdvZSA6xAWhU8k4nxG1vxIkOVgHeY,184
|
|
151
151
|
fabricks/utils/azure_queue.py,sha256=wtKAq_MD5QLxelerDO475dzL-SySIrxt9d5KGi-8vvw,3102
|
|
152
|
-
fabricks/utils/azure_table.py,sha256=
|
|
152
|
+
fabricks/utils/azure_table.py,sha256=J_UAPiCN89rL5FNmwIGg43Z6FSH8evVIWijDEgYJxk4,5294
|
|
153
153
|
fabricks/utils/console.py,sha256=X4lLgL_UxCjoFRx-ZRCwzdBveRGPKlFYZDi6vl7uevQ,1017
|
|
154
154
|
fabricks/utils/fdict.py,sha256=cdnvNBSXKJIDKSdhQGJA4CGv0qLn5IVYKQ111l7nM9I,7978
|
|
155
|
-
fabricks/utils/helpers.py,sha256=
|
|
155
|
+
fabricks/utils/helpers.py,sha256=fKv6mpT-428xTSjdLfm7TnN1Xo9FadrSIY1qzYgWCzs,7909
|
|
156
156
|
fabricks/utils/log.py,sha256=LCQEM81PhdojiyLrtEzv1QM__bWbaEhGddyd0IqyGXM,7985
|
|
157
157
|
fabricks/utils/mermaid.py,sha256=XoiVxPaUJS4TC_ybA-e78qFzQkQ46uPf055JiiNDdSg,986
|
|
158
|
-
fabricks/utils/path.py,sha256=
|
|
158
|
+
fabricks/utils/path.py,sha256=ToTTS8QKGsWq8cR8SDE2ocWKx2GsuZ5psPTuAMq9R4s,6813
|
|
159
159
|
fabricks/utils/pip.py,sha256=UHo7NTjFGJNghWBuuDow28xUkZYg2YrlbAP49IxZyXY,1522
|
|
160
160
|
fabricks/utils/pydantic.py,sha256=W0fiDLVMFrrInfQw2s5YPeSEvkN-4k864u3UyPoHaz4,2516
|
|
161
161
|
fabricks/utils/spark.py,sha256=QWVpbGwOvURIVBlR7ygt6NQ9MHUsIDvlquJ65iI8UBI,2007
|
|
@@ -171,6 +171,6 @@ fabricks/utils/schema/get_schema_for_type.py,sha256=5k-R6zCgUAtapQgxT4turcx1IQ-b
|
|
|
171
171
|
fabricks/utils/write/__init__.py,sha256=i0UnZenXj9Aq0b0_aU3s6882vg-Vu_AyKfQhl_dTp-g,200
|
|
172
172
|
fabricks/utils/write/delta.py,sha256=lTQ0CfUhcvn3xTCcT_Ns6PMDBsO5UEfa2S9XpJiLJ9c,1250
|
|
173
173
|
fabricks/utils/write/stream.py,sha256=wQBpAnQtYA6nl79sPKhVM6u5m-66suX7B6VQ6tW4TOs,622
|
|
174
|
-
fabricks-3.0.
|
|
175
|
-
fabricks-3.0.
|
|
176
|
-
fabricks-3.0.
|
|
174
|
+
fabricks-3.0.13.dist-info/METADATA,sha256=H7Yv6dwfPTscFWMQWayOB7t_eURWWYCGxPGzWsV71KQ,798
|
|
175
|
+
fabricks-3.0.13.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
|
|
176
|
+
fabricks-3.0.13.dist-info/RECORD,,
|
|
@@ -1,92 +0,0 @@
|
|
|
1
|
-
import logging
|
|
2
|
-
import os
|
|
3
|
-
from typing import Final
|
|
4
|
-
|
|
5
|
-
from fabricks.context.config.utils import get_config_from_file
|
|
6
|
-
from fabricks.utils.path import Path
|
|
7
|
-
from fabricks.utils.spark import spark
|
|
8
|
-
|
|
9
|
-
file_path, file_config = get_config_from_file()
|
|
10
|
-
|
|
11
|
-
runtime = os.environ.get("FABRICKS_RUNTIME", "none")
|
|
12
|
-
runtime = None if runtime.lower() == "none" else runtime
|
|
13
|
-
if runtime is None:
|
|
14
|
-
if runtime := file_config.get("runtime"):
|
|
15
|
-
assert file_path is not None
|
|
16
|
-
runtime = file_path.joinpath(runtime)
|
|
17
|
-
|
|
18
|
-
if runtime is None:
|
|
19
|
-
if file_path is not None:
|
|
20
|
-
runtime = file_path
|
|
21
|
-
else:
|
|
22
|
-
raise ValueError(
|
|
23
|
-
"could not resolve runtime (could not find pyproject.toml nor fabricksconfig.json nor FABRICKS_RUNTIME)"
|
|
24
|
-
)
|
|
25
|
-
|
|
26
|
-
path_runtime = Path(runtime, assume_git=True)
|
|
27
|
-
PATH_RUNTIME: Final[Path] = path_runtime
|
|
28
|
-
|
|
29
|
-
notebooks = os.environ.get("FABRICKS_NOTEBOOKS", "none")
|
|
30
|
-
notebooks = None if notebooks.lower() == "none" else notebooks
|
|
31
|
-
if notebooks is None:
|
|
32
|
-
if notebooks := file_config.get("notebooks"):
|
|
33
|
-
assert file_path is not None
|
|
34
|
-
notebooks = file_path.joinpath(notebooks)
|
|
35
|
-
|
|
36
|
-
notebooks = notebooks if notebooks else path_runtime.joinpath("notebooks")
|
|
37
|
-
PATH_NOTEBOOKS: Final[Path] = Path(str(notebooks), assume_git=True)
|
|
38
|
-
|
|
39
|
-
is_job_config_from_yaml = os.environ.get("FABRICKS_IS_JOB_CONFIG_FROM_YAML", None)
|
|
40
|
-
if is_job_config_from_yaml is None:
|
|
41
|
-
assert file_path is not None
|
|
42
|
-
is_job_config_from_yaml = file_config.get("job_config_from_yaml")
|
|
43
|
-
|
|
44
|
-
IS_JOB_CONFIG_FROM_YAML: Final[bool] = str(is_job_config_from_yaml).lower() in ("true", "1", "yes")
|
|
45
|
-
|
|
46
|
-
is_debugmode = os.environ.get("FABRICKS_IS_DEBUGMODE", None)
|
|
47
|
-
if is_debugmode is None:
|
|
48
|
-
is_debugmode = file_config.get("debugmode")
|
|
49
|
-
|
|
50
|
-
IS_DEBUGMODE: Final[bool] = str(is_debugmode).lower() in ("true", "1", "yes")
|
|
51
|
-
|
|
52
|
-
is_devmode = os.environ.get("FABRICKS_IS_DEVMODE", None)
|
|
53
|
-
if is_devmode is None:
|
|
54
|
-
is_devmode = file_config.get("devmode")
|
|
55
|
-
|
|
56
|
-
IS_DEVMODE: Final[bool] = str(is_devmode).lower() in ("true", "1", "yes")
|
|
57
|
-
|
|
58
|
-
loglevel = os.environ.get("FABRICKS_LOGLEVEL", None)
|
|
59
|
-
if loglevel is None:
|
|
60
|
-
loglevel = file_config.get("loglevel")
|
|
61
|
-
|
|
62
|
-
loglevel = loglevel.upper() if loglevel else "INFO"
|
|
63
|
-
if loglevel == "DEBUG":
|
|
64
|
-
_loglevel = logging.DEBUG
|
|
65
|
-
elif loglevel == "INFO":
|
|
66
|
-
_loglevel = logging.INFO
|
|
67
|
-
elif loglevel == "WARNING":
|
|
68
|
-
_loglevel = logging.WARNING
|
|
69
|
-
elif loglevel == "ERROR":
|
|
70
|
-
_loglevel = logging.ERROR
|
|
71
|
-
elif loglevel == "CRITICAL":
|
|
72
|
-
_loglevel = logging.CRITICAL
|
|
73
|
-
else:
|
|
74
|
-
raise ValueError(f"could not resolve {loglevel} (DEBUG, INFO, WARNING, ERROR or CRITICAL)")
|
|
75
|
-
|
|
76
|
-
LOGLEVEL = _loglevel
|
|
77
|
-
|
|
78
|
-
path_config = os.environ.get("FABRICKS_CONFIG")
|
|
79
|
-
if path_config is None:
|
|
80
|
-
if path_config := file_config.get("config"):
|
|
81
|
-
assert file_path is not None
|
|
82
|
-
path_config = file_path.joinpath(path_config)
|
|
83
|
-
else:
|
|
84
|
-
path_config = PATH_RUNTIME.joinpath(path_config).string if path_config else None
|
|
85
|
-
|
|
86
|
-
if not path_config:
|
|
87
|
-
path_config = PATH_RUNTIME.joinpath(
|
|
88
|
-
"fabricks",
|
|
89
|
-
f"conf.{spark.conf.get('spark.databricks.clusterUsageTags.clusterOwnerOrgId')}.yml",
|
|
90
|
-
).string
|
|
91
|
-
|
|
92
|
-
PATH_CONFIG: Final[Path] = Path(path_config, assume_git=True)
|
fabricks/context/config/utils.py
DELETED
|
@@ -1,53 +0,0 @@
|
|
|
1
|
-
def get_config_from_toml():
|
|
2
|
-
import os
|
|
3
|
-
import pathlib
|
|
4
|
-
import sys
|
|
5
|
-
|
|
6
|
-
if sys.version_info >= (3, 11):
|
|
7
|
-
import tomllib
|
|
8
|
-
else:
|
|
9
|
-
import tomli as tomllib # type: ignore
|
|
10
|
-
|
|
11
|
-
path = pathlib.Path(os.getcwd())
|
|
12
|
-
while path is not None and not (path / "pyproject.toml").exists():
|
|
13
|
-
if path == path.parent:
|
|
14
|
-
break
|
|
15
|
-
path = path.parent
|
|
16
|
-
|
|
17
|
-
if (path / "pyproject.toml").exists():
|
|
18
|
-
with open((path / "pyproject.toml"), "rb") as f:
|
|
19
|
-
config = tomllib.load(f)
|
|
20
|
-
return path, config.get("tool", {}).get("fabricks", {})
|
|
21
|
-
|
|
22
|
-
return None, {}
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
def get_config_from_json():
|
|
26
|
-
import json
|
|
27
|
-
import os
|
|
28
|
-
import pathlib
|
|
29
|
-
|
|
30
|
-
path = pathlib.Path(os.getcwd())
|
|
31
|
-
while path is not None and not (path / "fabricksconfig.json").exists():
|
|
32
|
-
if path == path.parent:
|
|
33
|
-
break
|
|
34
|
-
path = path.parent
|
|
35
|
-
|
|
36
|
-
if (path / "fabricksconfig.json").exists():
|
|
37
|
-
with open((path / "fabricksconfig.json"), "r") as f:
|
|
38
|
-
config = json.load(f)
|
|
39
|
-
return path, config
|
|
40
|
-
|
|
41
|
-
return None, {}
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
def get_config_from_file():
|
|
45
|
-
json_path, json_config = get_config_from_json()
|
|
46
|
-
if json_config:
|
|
47
|
-
return json_path, json_config
|
|
48
|
-
|
|
49
|
-
pyproject_path, pyproject_config = get_config_from_toml()
|
|
50
|
-
if pyproject_config:
|
|
51
|
-
return pyproject_path, pyproject_config
|
|
52
|
-
|
|
53
|
-
return None, {}
|