fabricks 3.0.11__py3-none-any.whl → 3.0.12__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- fabricks/cdc/base/merger.py +1 -0
- fabricks/cdc/base/processor.py +4 -3
- fabricks/context/config.py +127 -0
- fabricks/context/{config/utils.py → helpers.py} +31 -3
- fabricks/context/runtime.py +18 -42
- fabricks/core/dags/processor.py +3 -1
- fabricks/core/jobs/base/generator.py +19 -1
- fabricks/deploy/views.py +8 -3
- fabricks/metastore/table.py +39 -1
- {fabricks-3.0.11.dist-info → fabricks-3.0.12.dist-info}/METADATA +1 -1
- {fabricks-3.0.11.dist-info → fabricks-3.0.12.dist-info}/RECORD +12 -12
- fabricks/context/config/__init__.py +0 -92
- {fabricks-3.0.11.dist-info → fabricks-3.0.12.dist-info}/WHEEL +0 -0
fabricks/cdc/base/merger.py
CHANGED
fabricks/cdc/base/processor.py
CHANGED
|
@@ -390,11 +390,12 @@ class Processor(Generator):
|
|
|
390
390
|
try:
|
|
391
391
|
sql = template.render(**context)
|
|
392
392
|
if fix:
|
|
393
|
-
DEFAULT_LOGGER.debug("fix context", extra={"label": self, "sql": sql})
|
|
394
393
|
sql = self.fix_sql(sql)
|
|
394
|
+
else:
|
|
395
|
+
DEFAULT_LOGGER.debug("print query", extra={"label": self, "sql": sql})
|
|
395
396
|
|
|
396
397
|
except (Exception, TypeError) as e:
|
|
397
|
-
DEFAULT_LOGGER.exception("fail to
|
|
398
|
+
DEFAULT_LOGGER.exception("fail to render sql query", extra={"label": self, "context": context})
|
|
398
399
|
raise e
|
|
399
400
|
|
|
400
401
|
row = self.spark.sql(sql).collect()[0]
|
|
@@ -425,7 +426,7 @@ class Processor(Generator):
|
|
|
425
426
|
|
|
426
427
|
except (Exception, TypeError) as e:
|
|
427
428
|
DEFAULT_LOGGER.debug("context", extra={"label": self, "context": context})
|
|
428
|
-
DEFAULT_LOGGER.exception("fail to
|
|
429
|
+
DEFAULT_LOGGER.exception("fail to render sql query", extra={"label": self, "context": context})
|
|
429
430
|
raise e
|
|
430
431
|
|
|
431
432
|
return sql
|
|
@@ -0,0 +1,127 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
import os
|
|
3
|
+
from typing import Final
|
|
4
|
+
|
|
5
|
+
from fabricks.context.helpers import get_config_from_file
|
|
6
|
+
from fabricks.utils.log import get_logger
|
|
7
|
+
from fabricks.utils.path import Path
|
|
8
|
+
from fabricks.utils.spark import spark
|
|
9
|
+
|
|
10
|
+
logger, _ = get_logger("logs", level=logging.DEBUG)
|
|
11
|
+
file_path, file_config, origin = get_config_from_file()
|
|
12
|
+
|
|
13
|
+
if file_path:
|
|
14
|
+
logger.debug(f"found {origin} config ({file_path})", extra={"label": "config"})
|
|
15
|
+
|
|
16
|
+
# path to runtime
|
|
17
|
+
runtime = os.environ.get("FABRICKS_RUNTIME", "none")
|
|
18
|
+
runtime = None if runtime.lower() == "none" else runtime
|
|
19
|
+
if runtime is None:
|
|
20
|
+
if runtime := file_config.get("runtime"):
|
|
21
|
+
assert file_path is not None
|
|
22
|
+
runtime = file_path.joinpath(runtime)
|
|
23
|
+
logger.debug(f"resolve runtime from {origin} file", extra={"label": "config"})
|
|
24
|
+
else:
|
|
25
|
+
logger.debug("resolve runtime from env", extra={"label": "config"})
|
|
26
|
+
|
|
27
|
+
if runtime is None:
|
|
28
|
+
if file_path is not None:
|
|
29
|
+
runtime = file_path
|
|
30
|
+
logger.debug(f"resolve runtime from {origin} file", extra={"label": "config"})
|
|
31
|
+
else:
|
|
32
|
+
raise ValueError(
|
|
33
|
+
"could not resolve runtime (could not find pyproject.toml nor fabricksconfig.json nor FABRICKS_RUNTIME)"
|
|
34
|
+
)
|
|
35
|
+
|
|
36
|
+
path_runtime = Path(runtime, assume_git=True)
|
|
37
|
+
|
|
38
|
+
# path to config
|
|
39
|
+
config = os.environ.get("FABRICKS_CONFIG")
|
|
40
|
+
if config is None:
|
|
41
|
+
if config := file_config.get("config"):
|
|
42
|
+
assert file_path is not None
|
|
43
|
+
config = file_path.joinpath(config)
|
|
44
|
+
logger.debug(f"resolve config from {origin} file", extra={"label": "config"})
|
|
45
|
+
else:
|
|
46
|
+
logger.debug("resolve config from env", extra={"label": "config"})
|
|
47
|
+
|
|
48
|
+
if config is None:
|
|
49
|
+
logger.debug("resolve config from default path", extra={"label": "config"})
|
|
50
|
+
config = path_runtime.joinpath(
|
|
51
|
+
"fabricks",
|
|
52
|
+
f"conf.{spark.conf.get('spark.databricks.clusterUsageTags.clusterOwnerOrgId')}.yml",
|
|
53
|
+
).string
|
|
54
|
+
|
|
55
|
+
path_config = Path(config, assume_git=True)
|
|
56
|
+
|
|
57
|
+
# path to notebooks
|
|
58
|
+
notebooks = os.environ.get("FABRICKS_NOTEBOOKS", "none")
|
|
59
|
+
notebooks = None if notebooks.lower() == "none" else notebooks
|
|
60
|
+
if notebooks is None:
|
|
61
|
+
if notebooks := file_config.get("notebooks"):
|
|
62
|
+
assert file_path is not None
|
|
63
|
+
notebooks = file_path.joinpath(notebooks)
|
|
64
|
+
logger.debug(f"resolve notebooks from {origin} file", extra={"label": "config"})
|
|
65
|
+
else:
|
|
66
|
+
logger.debug("resolve notebooks from env", extra={"label": "config"})
|
|
67
|
+
|
|
68
|
+
if notebooks is None:
|
|
69
|
+
logger.debug("resolve notebooks from default path", extra={"label": "config"})
|
|
70
|
+
notebooks = path_runtime.joinpath("notebooks")
|
|
71
|
+
|
|
72
|
+
path_notebooks = Path(str(notebooks), assume_git=True)
|
|
73
|
+
|
|
74
|
+
# job config from yaml
|
|
75
|
+
is_job_config_from_yaml = os.environ.get("FABRICKS_IS_JOB_CONFIG_FROM_YAML", None)
|
|
76
|
+
if is_job_config_from_yaml is None:
|
|
77
|
+
if is_job_config_from_yaml := file_config.get("job_config_from_yaml"):
|
|
78
|
+
logger.debug(f"resolve job_config_from_yaml from {origin} file", extra={"label": "config"})
|
|
79
|
+
else:
|
|
80
|
+
logger.debug("resolve job_config_from_yaml from env", extra={"label": "config"})
|
|
81
|
+
|
|
82
|
+
# debug mode
|
|
83
|
+
is_debugmode = os.environ.get("FABRICKS_IS_DEBUGMODE", None)
|
|
84
|
+
if is_debugmode is None:
|
|
85
|
+
if is_debugmode := file_config.get("debugmode"):
|
|
86
|
+
logger.debug(f"resolve debugmode from {origin} file", extra={"label": "config"})
|
|
87
|
+
else:
|
|
88
|
+
logger.debug("resolve debugmode from env", extra={"label": "config"})
|
|
89
|
+
|
|
90
|
+
# dev mode
|
|
91
|
+
is_devmode = os.environ.get("FABRICKS_IS_DEVMODE", None)
|
|
92
|
+
if is_devmode is None:
|
|
93
|
+
if is_devmode := file_config.get("devmode"):
|
|
94
|
+
logger.debug(f"resolve devmode from {origin} file", extra={"label": "config"})
|
|
95
|
+
else:
|
|
96
|
+
logger.debug("resolve devmode from env", extra={"label": "config"})
|
|
97
|
+
|
|
98
|
+
# log level
|
|
99
|
+
loglevel = os.environ.get("FABRICKS_LOGLEVEL", None)
|
|
100
|
+
if loglevel is None:
|
|
101
|
+
if loglevel := file_config.get("loglevel"):
|
|
102
|
+
logger.debug(f"resolve loglevel from {origin} file", extra={"label": "config"})
|
|
103
|
+
else:
|
|
104
|
+
logger.debug("resolve loglevel from env", extra={"label": "config"})
|
|
105
|
+
|
|
106
|
+
loglevel = loglevel.upper() if loglevel else "INFO"
|
|
107
|
+
if loglevel == "DEBUG":
|
|
108
|
+
_loglevel = logging.DEBUG
|
|
109
|
+
elif loglevel == "INFO":
|
|
110
|
+
_loglevel = logging.INFO
|
|
111
|
+
elif loglevel == "WARNING":
|
|
112
|
+
_loglevel = logging.WARNING
|
|
113
|
+
elif loglevel == "ERROR":
|
|
114
|
+
_loglevel = logging.ERROR
|
|
115
|
+
elif loglevel == "CRITICAL":
|
|
116
|
+
_loglevel = logging.CRITICAL
|
|
117
|
+
else:
|
|
118
|
+
raise ValueError(f"could not resolve {loglevel} (DEBUG, INFO, WARNING, ERROR or CRITICAL)")
|
|
119
|
+
|
|
120
|
+
# Constants
|
|
121
|
+
PATH_CONFIG: Final[Path] = path_config
|
|
122
|
+
PATH_RUNTIME: Final[Path] = path_runtime
|
|
123
|
+
PATH_NOTEBOOKS: Final[Path] = path_notebooks
|
|
124
|
+
IS_JOB_CONFIG_FROM_YAML: Final[bool] = str(is_job_config_from_yaml).lower() in ("true", "1", "yes")
|
|
125
|
+
IS_DEBUGMODE: Final[bool] = str(is_debugmode).lower() in ("true", "1", "yes")
|
|
126
|
+
IS_DEVMODE: Final[bool] = str(is_devmode).lower() in ("true", "1", "yes")
|
|
127
|
+
LOGLEVEL: Final[int] = _loglevel
|
|
@@ -1,3 +1,8 @@
|
|
|
1
|
+
from typing import List
|
|
2
|
+
|
|
3
|
+
from fabricks.utils.path import Path
|
|
4
|
+
|
|
5
|
+
|
|
1
6
|
def get_config_from_toml():
|
|
2
7
|
import os
|
|
3
8
|
import pathlib
|
|
@@ -44,10 +49,33 @@ def get_config_from_json():
|
|
|
44
49
|
def get_config_from_file():
|
|
45
50
|
json_path, json_config = get_config_from_json()
|
|
46
51
|
if json_config:
|
|
47
|
-
return json_path, json_config
|
|
52
|
+
return json_path, json_config, "json"
|
|
48
53
|
|
|
49
54
|
pyproject_path, pyproject_config = get_config_from_toml()
|
|
50
55
|
if pyproject_config:
|
|
51
|
-
return pyproject_path, pyproject_config
|
|
56
|
+
return pyproject_path, pyproject_config, "pyproject"
|
|
52
57
|
|
|
53
|
-
return None, {}
|
|
58
|
+
return None, {}, None
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def get_storage_paths(objects: List[dict], variables: dict) -> dict:
|
|
62
|
+
d = {}
|
|
63
|
+
for o in objects:
|
|
64
|
+
if o:
|
|
65
|
+
name = o.get("name")
|
|
66
|
+
assert name
|
|
67
|
+
uri = o.get("path_options", {}).get("storage")
|
|
68
|
+
assert uri
|
|
69
|
+
d[name] = Path.from_uri(uri, regex=variables)
|
|
70
|
+
return d
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
def get_runtime_path(objects: List[dict], root: Path) -> dict:
|
|
74
|
+
d = {}
|
|
75
|
+
for o in objects:
|
|
76
|
+
name = o.get("name")
|
|
77
|
+
assert name
|
|
78
|
+
uri = o.get("path_options", {}).get("runtime")
|
|
79
|
+
assert uri
|
|
80
|
+
d[name] = root.joinpath(uri)
|
|
81
|
+
return d
|
fabricks/context/runtime.py
CHANGED
|
@@ -1,11 +1,12 @@
|
|
|
1
|
-
from typing import Final,
|
|
1
|
+
from typing import Final, Optional
|
|
2
2
|
|
|
3
3
|
import yaml
|
|
4
4
|
|
|
5
|
-
from fabricks.context.config import
|
|
5
|
+
from fabricks.context.config import PATH_CONFIG, PATH_RUNTIME
|
|
6
|
+
from fabricks.context.helpers import get_runtime_path, get_storage_paths
|
|
6
7
|
from fabricks.utils.path import Path
|
|
7
8
|
|
|
8
|
-
with open(str(
|
|
9
|
+
with open(str(PATH_CONFIG)) as f:
|
|
9
10
|
data = yaml.safe_load(f)
|
|
10
11
|
|
|
11
12
|
conf: dict = [d["conf"] for d in data][0]
|
|
@@ -51,67 +52,42 @@ FABRICKS_STORAGE_CREDENTIAL: Final[Optional[str]] = path_options.get("storage_cr
|
|
|
51
52
|
|
|
52
53
|
path_udfs = path_options.get("udfs", "fabricks/udfs")
|
|
53
54
|
assert path_udfs, "path to udfs mandatory"
|
|
54
|
-
PATH_UDFS: Final[Path] =
|
|
55
|
+
PATH_UDFS: Final[Path] = PATH_RUNTIME.joinpath(path_udfs)
|
|
55
56
|
|
|
56
57
|
path_parsers = path_options.get("parsers", "fabricks/parsers")
|
|
57
58
|
assert path_parsers, "path to parsers mandatory"
|
|
58
|
-
PATH_PARSERS: Final[Path] =
|
|
59
|
+
PATH_PARSERS: Final[Path] = PATH_RUNTIME.joinpath(path_parsers)
|
|
59
60
|
|
|
60
61
|
path_extenders = path_options.get("extenders", "fabricks/extenders")
|
|
61
62
|
assert path_extenders, "path to extenders mandatory"
|
|
62
|
-
PATH_EXTENDERS: Final[Path] =
|
|
63
|
+
PATH_EXTENDERS: Final[Path] = PATH_RUNTIME.joinpath(path_extenders)
|
|
63
64
|
|
|
64
65
|
path_views = path_options.get("views", "fabricks/views")
|
|
65
66
|
assert path_views, "path to views mandatory"
|
|
66
|
-
PATH_VIEWS: Final[Path] =
|
|
67
|
+
PATH_VIEWS: Final[Path] = PATH_RUNTIME.joinpath(path_views)
|
|
67
68
|
|
|
68
69
|
path_schedules = path_options.get("schedules", "fabricks/schedules")
|
|
69
70
|
assert path_schedules, "path to schedules mandatory"
|
|
70
|
-
PATH_SCHEDULES: Final[Path] =
|
|
71
|
+
PATH_SCHEDULES: Final[Path] = PATH_RUNTIME.joinpath(path_schedules)
|
|
71
72
|
|
|
72
73
|
path_requirements = path_options.get("requirements", "fabricks/requirements")
|
|
73
74
|
assert path_requirements, "path to requirements mandatory"
|
|
74
|
-
PATH_REQUIREMENTS: Final[Path] =
|
|
75
|
+
PATH_REQUIREMENTS: Final[Path] = PATH_RUNTIME.joinpath(path_requirements)
|
|
75
76
|
|
|
76
77
|
path_masks = path_options.get("masks", "fabricks/masks")
|
|
77
78
|
assert path_masks, "path to masks mandatory"
|
|
78
|
-
PATH_MASKS: Final[Path] =
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
def _get_storage_paths(objects: List[dict]) -> dict:
|
|
82
|
-
d = {}
|
|
83
|
-
for o in objects:
|
|
84
|
-
if o:
|
|
85
|
-
name = o.get("name")
|
|
86
|
-
assert name
|
|
87
|
-
uri = o.get("path_options", {}).get("storage")
|
|
88
|
-
assert uri
|
|
89
|
-
d[name] = Path.from_uri(uri, regex=variables)
|
|
90
|
-
return d
|
|
91
|
-
|
|
79
|
+
PATH_MASKS: Final[Path] = PATH_RUNTIME.joinpath(path_masks)
|
|
92
80
|
|
|
93
81
|
PATHS_STORAGE: Final[dict[str, Path]] = {
|
|
94
82
|
"fabricks": FABRICKS_STORAGE,
|
|
95
|
-
**
|
|
96
|
-
**
|
|
97
|
-
**
|
|
98
|
-
**
|
|
83
|
+
**get_storage_paths(BRONZE, variables),
|
|
84
|
+
**get_storage_paths(SILVER, variables),
|
|
85
|
+
**get_storage_paths(GOLD, variables),
|
|
86
|
+
**get_storage_paths(databases, variables),
|
|
99
87
|
}
|
|
100
88
|
|
|
101
|
-
|
|
102
|
-
def _get_runtime_path(objects: List[dict]) -> dict:
|
|
103
|
-
d = {}
|
|
104
|
-
for o in objects:
|
|
105
|
-
name = o.get("name")
|
|
106
|
-
assert name
|
|
107
|
-
uri = o.get("path_options", {}).get("runtime")
|
|
108
|
-
assert uri
|
|
109
|
-
d[name] = path_runtime.joinpath(uri)
|
|
110
|
-
return d
|
|
111
|
-
|
|
112
|
-
|
|
113
89
|
PATHS_RUNTIME: Final[dict[str, Path]] = {
|
|
114
|
-
**
|
|
115
|
-
**
|
|
116
|
-
**
|
|
90
|
+
**get_runtime_path(BRONZE, PATH_RUNTIME),
|
|
91
|
+
**get_runtime_path(SILVER, PATH_RUNTIME),
|
|
92
|
+
**get_runtime_path(GOLD, PATH_RUNTIME),
|
|
117
93
|
}
|
fabricks/core/dags/processor.py
CHANGED
|
@@ -10,7 +10,7 @@ from tenacity import retry, retry_if_exception_type, stop_after_attempt, wait_ex
|
|
|
10
10
|
|
|
11
11
|
from fabricks.context import PATH_NOTEBOOKS
|
|
12
12
|
from fabricks.core.dags.base import BaseDags
|
|
13
|
-
from fabricks.core.dags.log import LOGGER
|
|
13
|
+
from fabricks.core.dags.log import LOGGER, TABLE_LOG_HANDLER
|
|
14
14
|
from fabricks.core.dags.run import run
|
|
15
15
|
from fabricks.core.jobs.base._types import TStep
|
|
16
16
|
from fabricks.core.steps.get_step import get_step
|
|
@@ -148,7 +148,9 @@ class DagProcessor(BaseDags):
|
|
|
148
148
|
finally:
|
|
149
149
|
j["Status"] = "ok"
|
|
150
150
|
self.table.upsert(j)
|
|
151
|
+
|
|
151
152
|
LOGGER.info("end", extra=self.extra(j))
|
|
153
|
+
TABLE_LOG_HANDLER.flush()
|
|
152
154
|
|
|
153
155
|
dependencies = self.table.query(f"PartitionKey eq 'dependencies' and ParentId eq '{j.get('JobId')}'")
|
|
154
156
|
self.table.delete(dependencies)
|
|
@@ -336,7 +336,7 @@ class Generator(Configurator):
|
|
|
336
336
|
|
|
337
337
|
comment = self.options.table.get("comment")
|
|
338
338
|
if comment:
|
|
339
|
-
self.table.
|
|
339
|
+
self.table.add_table_comment(comment=comment)
|
|
340
340
|
|
|
341
341
|
else:
|
|
342
342
|
DEFAULT_LOGGER.debug("table exists, skip creation", extra={"label": self})
|
|
@@ -389,6 +389,24 @@ class Generator(Configurator):
|
|
|
389
389
|
def overwrite_schema(self, df: Optional[DataFrame] = None):
|
|
390
390
|
self._update_schema(df=df, overwrite=True)
|
|
391
391
|
|
|
392
|
+
def update_comments(self, table: Optional[bool] = True, columns: Optional[bool] = True):
|
|
393
|
+
if self.virtual:
|
|
394
|
+
return
|
|
395
|
+
|
|
396
|
+
if self.persist:
|
|
397
|
+
self.table.drop_comments()
|
|
398
|
+
|
|
399
|
+
if table:
|
|
400
|
+
comment = self.options.table.get("comment")
|
|
401
|
+
if comment:
|
|
402
|
+
self.table.add_table_comment(comment=comment)
|
|
403
|
+
|
|
404
|
+
if columns:
|
|
405
|
+
comments = self.options.table.get_dict("comments")
|
|
406
|
+
if comments:
|
|
407
|
+
for col, comment in comments.items():
|
|
408
|
+
self.table.add_column_comment(column=col, comment=comment)
|
|
409
|
+
|
|
392
410
|
def get_differences_with_deltatable(self, df: Optional[DataFrame] = None):
|
|
393
411
|
if df is None:
|
|
394
412
|
df = self.get_data(stream=self.stream)
|
fabricks/deploy/views.py
CHANGED
|
@@ -276,6 +276,7 @@ def create_or_replace_logs_pivot_view():
|
|
|
276
276
|
l.step,
|
|
277
277
|
l.job,
|
|
278
278
|
l.job_id,
|
|
279
|
+
-- flags
|
|
279
280
|
collect_set(l.status) as statuses,
|
|
280
281
|
array_contains(statuses, 'skipped') as skipped,
|
|
281
282
|
array_contains(statuses, 'warned') as warned,
|
|
@@ -283,15 +284,19 @@ def create_or_replace_logs_pivot_view():
|
|
|
283
284
|
array_contains(statuses, 'failed') or (not done and not skipped) as failed,
|
|
284
285
|
not done and not failed and not skipped and array_contains(statuses, 'running') as timed_out,
|
|
285
286
|
not array_contains(statuses, 'running') as cancelled,
|
|
287
|
+
--
|
|
286
288
|
max(l.notebook_id) as notebook_id,
|
|
289
|
+
--
|
|
290
|
+
max(l.timestamp) filter (where l.status = 'running') as start_time,
|
|
291
|
+
max(l.timestamp) filter (where l.status in ('done', 'ok')) as end_time,
|
|
292
|
+
--
|
|
287
293
|
max(l.timestamp) filter (where l.status = 'scheduled' ) as scheduled_time,
|
|
288
294
|
max(l.timestamp) filter (where l.status = 'waiting' ) as waiting_time,
|
|
289
|
-
max(l.timestamp) filter (where l.status = 'running') as start_time,
|
|
290
295
|
max(l.timestamp) filter (where l.status = 'running' ) as running_time,
|
|
291
296
|
max(l.timestamp) filter (where l.status = 'done' ) as done_time,
|
|
292
297
|
max(l.timestamp) filter (where l.status = 'failed' ) as failed_time,
|
|
293
|
-
max(l.timestamp) filter(where l.status = 'ok') as
|
|
294
|
-
|
|
298
|
+
max(l.timestamp) filter (where l.status = 'ok') as ok_time,
|
|
299
|
+
--
|
|
295
300
|
max(l.exception) as exception
|
|
296
301
|
from
|
|
297
302
|
fabricks.logs l
|
fabricks/metastore/table.py
CHANGED
|
@@ -666,7 +666,45 @@ class Table(DbObject):
|
|
|
666
666
|
"""
|
|
667
667
|
)
|
|
668
668
|
|
|
669
|
-
def
|
|
669
|
+
def drop_comments(self):
|
|
670
|
+
self.drop_table_comment()
|
|
671
|
+
for col in self.columns:
|
|
672
|
+
self.drop_column_comment(col)
|
|
673
|
+
|
|
674
|
+
def drop_table_comment(self):
|
|
675
|
+
assert self.registered, f"{self} not registered"
|
|
676
|
+
|
|
677
|
+
DEFAULT_LOGGER.debug("drop table comment", extra={"label": self})
|
|
678
|
+
self.spark.sql(
|
|
679
|
+
f"""
|
|
680
|
+
comment on table {self.qualified_name}
|
|
681
|
+
is null;
|
|
682
|
+
"""
|
|
683
|
+
)
|
|
684
|
+
|
|
685
|
+
def drop_column_comment(self, column: str):
|
|
686
|
+
assert self.registered, f"{self} not registered"
|
|
687
|
+
|
|
688
|
+
DEFAULT_LOGGER.debug(f"drop comment from column {column}", extra={"label": self})
|
|
689
|
+
self.spark.sql(
|
|
690
|
+
f"""
|
|
691
|
+
comment on column {self.qualified_name}.`{column}`
|
|
692
|
+
is null;
|
|
693
|
+
"""
|
|
694
|
+
)
|
|
695
|
+
|
|
696
|
+
def add_column_comment(self, column: str, comment: str):
|
|
697
|
+
assert self.registered, f"{self} not registered"
|
|
698
|
+
|
|
699
|
+
DEFAULT_LOGGER.debug(f"add comment '{comment}' to column {column}", extra={"label": self})
|
|
700
|
+
self.spark.sql(
|
|
701
|
+
f"""
|
|
702
|
+
comment on column {self.qualified_name}.`{column}`
|
|
703
|
+
is '{comment}';
|
|
704
|
+
"""
|
|
705
|
+
)
|
|
706
|
+
|
|
707
|
+
def add_table_comment(self, comment: str):
|
|
670
708
|
assert self.registered, f"{self} not registered"
|
|
671
709
|
|
|
672
710
|
DEFAULT_LOGGER.debug(f"add comment '{comment}'", extra={"label": self})
|
|
@@ -40,8 +40,8 @@ fabricks/cdc/base/_types.py,sha256=WloCDC3ATrn0aZJ6E8BRYKZx19N3EE56r6qlBYhcuvQ,2
|
|
|
40
40
|
fabricks/cdc/base/cdc.py,sha256=9w5BqQxSVbFVEozJWmZQThqdppkE_SYi4fHSzJ7WMvA,78
|
|
41
41
|
fabricks/cdc/base/configurator.py,sha256=w6Ywif87iv1WG-5OM3XkzIRrsns-_QQ6XlADpk0YLlw,6434
|
|
42
42
|
fabricks/cdc/base/generator.py,sha256=pa_GJn7Pdi5vMnXN8zExmOPMpCqdZ3QoxHEB0wv0lsk,5933
|
|
43
|
-
fabricks/cdc/base/merger.py,sha256=
|
|
44
|
-
fabricks/cdc/base/processor.py,sha256=
|
|
43
|
+
fabricks/cdc/base/merger.py,sha256=3qUUs0uqmwOMdXc50kV3Zo9omuQuUUFgtMLBrg4E-wk,4159
|
|
44
|
+
fabricks/cdc/base/processor.py,sha256=gL3pWMaBRsc0oB93ISnH2x07WbmtM_QEIx8qrUcUoZ0,17704
|
|
45
45
|
fabricks/cdc/templates/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
46
46
|
fabricks/cdc/templates/filter.sql.jinja,sha256=AQcOj4KRwKscVG3zepnEAN9Yxb03AM7068hqW7dtVI8,236
|
|
47
47
|
fabricks/cdc/templates/merge.sql.jinja,sha256=YS9wWckCVsUI1pUYiRSFBIuQ16WU3EPWSkhZVy2niBA,221
|
|
@@ -69,13 +69,13 @@ fabricks/cdc/templates/queries/nocdc/complete.sql.jinja,sha256=cVKKCSbiuuw1K7BOz
|
|
|
69
69
|
fabricks/cdc/templates/queries/nocdc/update.sql.jinja,sha256=mjNUwGVhZ08yUkdv9sCTkqyW60p0YavtWTqvSUVrwjA,1283
|
|
70
70
|
fabricks/context/__init__.py,sha256=qfntJ9O6omzY_t6AhDP6Ndu9C5LMiVdWbo6ikhtoe7o,1446
|
|
71
71
|
fabricks/context/_types.py,sha256=FzQJ35vp0uc6pAq18bc-VHwMVEWtd0VDdm8xQmNr2Sg,2681
|
|
72
|
+
fabricks/context/config.py,sha256=EmLUnswuWfrncaNJMDjvdMg-1lD8aneKAY8IDna7VPE,4814
|
|
73
|
+
fabricks/context/helpers.py,sha256=GV9MscE8p6CsHbjVC-Qnqtv9VBf893DoxfLa057hFT8,2061
|
|
72
74
|
fabricks/context/log.py,sha256=CadrRf8iL6iXlGIGIhEIswa7wGqC-E-oLwWcGTyJ10s,2074
|
|
73
|
-
fabricks/context/runtime.py,sha256=
|
|
75
|
+
fabricks/context/runtime.py,sha256=87PtX6SqLoFd0PGxgisF6dLlxtCHaHxkMMIt34UyB2w,3479
|
|
74
76
|
fabricks/context/secret.py,sha256=iRM-KU-JcJAEOLoGJ8S4Oh65-yt674W6CDTSkOE7SXw,3192
|
|
75
77
|
fabricks/context/spark_session.py,sha256=BPaxKJXHZDI5oQiOPhmua_xjXnrVgluh--AVpvUgbck,2553
|
|
76
78
|
fabricks/context/utils.py,sha256=EQRscdUhdjwk2htZu8gCgNZ9PfRzzrR6e1kRrIbVlBM,2786
|
|
77
|
-
fabricks/context/config/__init__.py,sha256=pFEsGXBQkX5_FP0cwQMX427j6dQuTx81NR9snMxc8cU,3127
|
|
78
|
-
fabricks/context/config/utils.py,sha256=7KCTUiSbqQnDD5mbCO9_o1KbUgD-Xbei_UGgpMQi9nE,1371
|
|
79
79
|
fabricks/core/__init__.py,sha256=LaqDi4xuyHAoLOvS44PQdZdRfq9SmVr7mB6BDHyxYpc,209
|
|
80
80
|
fabricks/core/extenders.py,sha256=39bSm9QiW4vBAyT659joE-5p_EZiNM4gi8KA3-OgX3E,917
|
|
81
81
|
fabricks/core/job_schema.py,sha256=6-70oy0ZJd3V9AiXfc0Q8b8NVEynxQza_h7mB13uB-s,853
|
|
@@ -86,7 +86,7 @@ fabricks/core/dags/__init__.py,sha256=0DUKzVcXcROvxkN19P_kaOJ7da5BAM7Vt8EGQbp2KS
|
|
|
86
86
|
fabricks/core/dags/base.py,sha256=tFj27SqeZUZ7pB_LOWkpdowZz5gj30JUANI4gWK3Pl8,3139
|
|
87
87
|
fabricks/core/dags/generator.py,sha256=4fp_CRsWnl_UauM9Jx-E4UCaxnm2_Q5103J58fRws2U,4832
|
|
88
88
|
fabricks/core/dags/log.py,sha256=v1xfpQGfddHDz9lflvXOWTXMde3CdERo9jzeSmNDRhY,402
|
|
89
|
-
fabricks/core/dags/processor.py,sha256=
|
|
89
|
+
fabricks/core/dags/processor.py,sha256=IzjqrpNu6lTYp-Rl2T_8Sb5N0pSk9BhdP9vuL4sBRMg,7930
|
|
90
90
|
fabricks/core/dags/run.py,sha256=RIDULb9WakObSyYzmkglh8RwFRwC8-NFC-1yPDMkBC0,1074
|
|
91
91
|
fabricks/core/dags/terminator.py,sha256=Y6pV2UnSyrCIx2AQWJXoHk5Roq12gZqpennHx_Lbnzs,793
|
|
92
92
|
fabricks/core/dags/utils.py,sha256=4kyobLGl4tO0Flo6LxNzYjCU_G42vns1LrkxTO5_KLY,1585
|
|
@@ -105,7 +105,7 @@ fabricks/core/jobs/base/_types.py,sha256=y66BtJlJskq7wGzn7te5XYjO-NEqeQGUC11kkbe
|
|
|
105
105
|
fabricks/core/jobs/base/checker.py,sha256=Cdfh8rQYy4MvMFl0HyC3alGUWm8zrFXk08m2t2JMu6Y,5477
|
|
106
106
|
fabricks/core/jobs/base/configurator.py,sha256=9G5F7Qg5FWHPbHgdh8Qxc85OoSX0rnjD4c9itwU5KKc,10415
|
|
107
107
|
fabricks/core/jobs/base/exception.py,sha256=HrdxEuOfK5rY-ItZvEL3iywLgdpYUpmWFkjjjks7oYc,2318
|
|
108
|
-
fabricks/core/jobs/base/generator.py,sha256=
|
|
108
|
+
fabricks/core/jobs/base/generator.py,sha256=TI4Wy8tlVRIKEczx6qmrIf2ppGwLvhdaatb9pnCvzlI,17635
|
|
109
109
|
fabricks/core/jobs/base/invoker.py,sha256=FvjfpNqi542slxC2yLu1BIu5EklNUWySxDF8cD_SqKQ,7602
|
|
110
110
|
fabricks/core/jobs/base/job.py,sha256=dWmk2PpQH2NETaaDS6KoiefRnDHfDMdCyhmogkdcSFI,93
|
|
111
111
|
fabricks/core/jobs/base/processor.py,sha256=qkNiJSSLaEnivKGBcd9UZyIVFexnv-n1p_5mCZIy1rA,9076
|
|
@@ -136,14 +136,14 @@ fabricks/deploy/schedules.py,sha256=0a5dU1rW6fg8aAp7TTt-l0DgR-4kmzsX2xxV2C30yaw,
|
|
|
136
136
|
fabricks/deploy/tables.py,sha256=IF822oxOCy12r08Dz54YUK5luud6dtTPxJ4TUIHE-No,2621
|
|
137
137
|
fabricks/deploy/udfs.py,sha256=7fw3O5LgOOxDEhuS3s1yFdqybgFh65r_1IdfZUYeejs,597
|
|
138
138
|
fabricks/deploy/utils.py,sha256=V41r1zVT9KcsICqTLAzpb4ixRk2q2ybJMrGhkPOtG6k,5099
|
|
139
|
-
fabricks/deploy/views.py,sha256=
|
|
139
|
+
fabricks/deploy/views.py,sha256=8cSt6IzZy-JHHkyqd91NT2hi3LTNTOolimlfSBXMCvU,14434
|
|
140
140
|
fabricks/metastore/README.md,sha256=utPUGAxmjyNMGe43GfL0Gup4MjeTKKwyiUoNVSfMquI,51
|
|
141
141
|
fabricks/metastore/__init__.py,sha256=RhjY2CuqtZBg8fEizzzvW8qszqCM-vSCL1tQGuzoato,174
|
|
142
142
|
fabricks/metastore/_types.py,sha256=NXYxwQHP0sCllM0N6QBbaK4CdtM_m_rHFDxRNRfBcLU,1919
|
|
143
143
|
fabricks/metastore/database.py,sha256=23VAKKzjrwlEaj28DNNmiOhcfdKRzYk8eEfq-PzINbg,1924
|
|
144
144
|
fabricks/metastore/dbobject.py,sha256=ve8p48OqEpJYsqWNhgesGSE0emM--uY8QrvBRoR3j3g,1881
|
|
145
145
|
fabricks/metastore/pyproject.toml,sha256=6RZM9RMKMDF_EAequhORZ7TD0BQNk7aBCTWAv-sRcp0,519
|
|
146
|
-
fabricks/metastore/table.py,sha256=
|
|
146
|
+
fabricks/metastore/table.py,sha256=AaoNL-1mz4A0CCb3tH_0BUurYPjA1oL5pioCYlEMtu4,29113
|
|
147
147
|
fabricks/metastore/utils.py,sha256=8SxhjDkz_aSH4IGUusel7hqOQxP9U8PNBCY0M7GH00Y,1355
|
|
148
148
|
fabricks/metastore/view.py,sha256=f7hKJWtnH1KmZym8dkoucKOTndntzai_f2YqferxHLs,1431
|
|
149
149
|
fabricks/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -171,6 +171,6 @@ fabricks/utils/schema/get_schema_for_type.py,sha256=5k-R6zCgUAtapQgxT4turcx1IQ-b
|
|
|
171
171
|
fabricks/utils/write/__init__.py,sha256=i0UnZenXj9Aq0b0_aU3s6882vg-Vu_AyKfQhl_dTp-g,200
|
|
172
172
|
fabricks/utils/write/delta.py,sha256=lTQ0CfUhcvn3xTCcT_Ns6PMDBsO5UEfa2S9XpJiLJ9c,1250
|
|
173
173
|
fabricks/utils/write/stream.py,sha256=wQBpAnQtYA6nl79sPKhVM6u5m-66suX7B6VQ6tW4TOs,622
|
|
174
|
-
fabricks-3.0.
|
|
175
|
-
fabricks-3.0.
|
|
176
|
-
fabricks-3.0.
|
|
174
|
+
fabricks-3.0.12.dist-info/METADATA,sha256=LYFVTMlw8ueiyViUZC5ZZHk2LZkpNnVGO2B1ZhYrdTg,798
|
|
175
|
+
fabricks-3.0.12.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
176
|
+
fabricks-3.0.12.dist-info/RECORD,,
|
|
@@ -1,92 +0,0 @@
|
|
|
1
|
-
import logging
|
|
2
|
-
import os
|
|
3
|
-
from typing import Final
|
|
4
|
-
|
|
5
|
-
from fabricks.context.config.utils import get_config_from_file
|
|
6
|
-
from fabricks.utils.path import Path
|
|
7
|
-
from fabricks.utils.spark import spark
|
|
8
|
-
|
|
9
|
-
file_path, file_config = get_config_from_file()
|
|
10
|
-
|
|
11
|
-
runtime = os.environ.get("FABRICKS_RUNTIME", "none")
|
|
12
|
-
runtime = None if runtime.lower() == "none" else runtime
|
|
13
|
-
if runtime is None:
|
|
14
|
-
if runtime := file_config.get("runtime"):
|
|
15
|
-
assert file_path is not None
|
|
16
|
-
runtime = file_path.joinpath(runtime)
|
|
17
|
-
|
|
18
|
-
if runtime is None:
|
|
19
|
-
if file_path is not None:
|
|
20
|
-
runtime = file_path
|
|
21
|
-
else:
|
|
22
|
-
raise ValueError(
|
|
23
|
-
"could not resolve runtime (could not find pyproject.toml nor fabricksconfig.json nor FABRICKS_RUNTIME)"
|
|
24
|
-
)
|
|
25
|
-
|
|
26
|
-
path_runtime = Path(runtime, assume_git=True)
|
|
27
|
-
PATH_RUNTIME: Final[Path] = path_runtime
|
|
28
|
-
|
|
29
|
-
notebooks = os.environ.get("FABRICKS_NOTEBOOKS", "none")
|
|
30
|
-
notebooks = None if notebooks.lower() == "none" else notebooks
|
|
31
|
-
if notebooks is None:
|
|
32
|
-
if notebooks := file_config.get("notebooks"):
|
|
33
|
-
assert file_path is not None
|
|
34
|
-
notebooks = file_path.joinpath(notebooks)
|
|
35
|
-
|
|
36
|
-
notebooks = notebooks if notebooks else path_runtime.joinpath("notebooks")
|
|
37
|
-
PATH_NOTEBOOKS: Final[Path] = Path(str(notebooks), assume_git=True)
|
|
38
|
-
|
|
39
|
-
is_job_config_from_yaml = os.environ.get("FABRICKS_IS_JOB_CONFIG_FROM_YAML", None)
|
|
40
|
-
if is_job_config_from_yaml is None:
|
|
41
|
-
assert file_path is not None
|
|
42
|
-
is_job_config_from_yaml = file_config.get("job_config_from_yaml")
|
|
43
|
-
|
|
44
|
-
IS_JOB_CONFIG_FROM_YAML: Final[bool] = str(is_job_config_from_yaml).lower() in ("true", "1", "yes")
|
|
45
|
-
|
|
46
|
-
is_debugmode = os.environ.get("FABRICKS_IS_DEBUGMODE", None)
|
|
47
|
-
if is_debugmode is None:
|
|
48
|
-
is_debugmode = file_config.get("debugmode")
|
|
49
|
-
|
|
50
|
-
IS_DEBUGMODE: Final[bool] = str(is_debugmode).lower() in ("true", "1", "yes")
|
|
51
|
-
|
|
52
|
-
is_devmode = os.environ.get("FABRICKS_IS_DEVMODE", None)
|
|
53
|
-
if is_devmode is None:
|
|
54
|
-
is_devmode = file_config.get("devmode")
|
|
55
|
-
|
|
56
|
-
IS_DEVMODE: Final[bool] = str(is_devmode).lower() in ("true", "1", "yes")
|
|
57
|
-
|
|
58
|
-
loglevel = os.environ.get("FABRICKS_LOGLEVEL", None)
|
|
59
|
-
if loglevel is None:
|
|
60
|
-
loglevel = file_config.get("loglevel")
|
|
61
|
-
|
|
62
|
-
loglevel = loglevel.upper() if loglevel else "INFO"
|
|
63
|
-
if loglevel == "DEBUG":
|
|
64
|
-
_loglevel = logging.DEBUG
|
|
65
|
-
elif loglevel == "INFO":
|
|
66
|
-
_loglevel = logging.INFO
|
|
67
|
-
elif loglevel == "WARNING":
|
|
68
|
-
_loglevel = logging.WARNING
|
|
69
|
-
elif loglevel == "ERROR":
|
|
70
|
-
_loglevel = logging.ERROR
|
|
71
|
-
elif loglevel == "CRITICAL":
|
|
72
|
-
_loglevel = logging.CRITICAL
|
|
73
|
-
else:
|
|
74
|
-
raise ValueError(f"could not resolve {loglevel} (DEBUG, INFO, WARNING, ERROR or CRITICAL)")
|
|
75
|
-
|
|
76
|
-
LOGLEVEL = _loglevel
|
|
77
|
-
|
|
78
|
-
path_config = os.environ.get("FABRICKS_CONFIG")
|
|
79
|
-
if path_config is None:
|
|
80
|
-
if path_config := file_config.get("config"):
|
|
81
|
-
assert file_path is not None
|
|
82
|
-
path_config = file_path.joinpath(path_config)
|
|
83
|
-
else:
|
|
84
|
-
path_config = PATH_RUNTIME.joinpath(path_config).string if path_config else None
|
|
85
|
-
|
|
86
|
-
if not path_config:
|
|
87
|
-
path_config = PATH_RUNTIME.joinpath(
|
|
88
|
-
"fabricks",
|
|
89
|
-
f"conf.{spark.conf.get('spark.databricks.clusterUsageTags.clusterOwnerOrgId')}.yml",
|
|
90
|
-
).string
|
|
91
|
-
|
|
92
|
-
PATH_CONFIG: Final[Path] = Path(path_config, assume_git=True)
|
|
File without changes
|