fabricks 2024.7.1.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- fabricks/__init__.py +0 -0
- fabricks/api/__init__.py +7 -0
- fabricks/api/cdc/__init__.py +6 -0
- fabricks/api/cdc/nocdc.py +3 -0
- fabricks/api/cdc/scd1.py +3 -0
- fabricks/api/cdc/scd2.py +3 -0
- fabricks/api/context.py +31 -0
- fabricks/api/core.py +4 -0
- fabricks/api/extenders.py +3 -0
- fabricks/api/log.py +3 -0
- fabricks/api/metastore/__init__.py +10 -0
- fabricks/api/metastore/database.py +3 -0
- fabricks/api/metastore/table.py +3 -0
- fabricks/api/metastore/view.py +6 -0
- fabricks/api/notebooks/__init__.py +0 -0
- fabricks/api/notebooks/cluster.py +6 -0
- fabricks/api/notebooks/deploy/__init__.py +0 -0
- fabricks/api/notebooks/deploy/fabricks.py +147 -0
- fabricks/api/notebooks/deploy/notebooks.py +86 -0
- fabricks/api/notebooks/initialize.py +38 -0
- fabricks/api/notebooks/optimize.py +25 -0
- fabricks/api/notebooks/process.py +50 -0
- fabricks/api/notebooks/run.py +87 -0
- fabricks/api/notebooks/terminate.py +27 -0
- fabricks/api/notebooks/vacuum.py +25 -0
- fabricks/api/parsers.py +3 -0
- fabricks/api/udfs.py +3 -0
- fabricks/api/utils.py +9 -0
- fabricks/cdc/__init__.py +14 -0
- fabricks/cdc/base/__init__.py +4 -0
- fabricks/cdc/base/cdc.py +5 -0
- fabricks/cdc/base/configurator.py +145 -0
- fabricks/cdc/base/generator.py +117 -0
- fabricks/cdc/base/merger.py +107 -0
- fabricks/cdc/base/processor.py +338 -0
- fabricks/cdc/base/types.py +3 -0
- fabricks/cdc/cdc.py +5 -0
- fabricks/cdc/nocdc.py +19 -0
- fabricks/cdc/scd.py +21 -0
- fabricks/cdc/scd1.py +15 -0
- fabricks/cdc/scd2.py +15 -0
- fabricks/cdc/templates/__init__.py +0 -0
- fabricks/cdc/templates/merge/scd1.sql.jinja +72 -0
- fabricks/cdc/templates/merge/scd2.sql.jinja +54 -0
- fabricks/cdc/templates/merge.sql.jinja +2 -0
- fabricks/cdc/templates/query/__init__.py +0 -0
- fabricks/cdc/templates/query/base.sql.jinja +34 -0
- fabricks/cdc/templates/query/context.sql.jinja +95 -0
- fabricks/cdc/templates/query/current.sql.jinja +32 -0
- fabricks/cdc/templates/query/deduplicate_hash.sql.jinja +21 -0
- fabricks/cdc/templates/query/deduplicate_key.sql.jinja +14 -0
- fabricks/cdc/templates/query/filter.sql.jinja +71 -0
- fabricks/cdc/templates/query/final.sql.jinja +1 -0
- fabricks/cdc/templates/query/hash.sql.jinja +1 -0
- fabricks/cdc/templates/query/nocdc.sql.jinja +10 -0
- fabricks/cdc/templates/query/rectify.sql.jinja +120 -0
- fabricks/cdc/templates/query/scd1.sql.jinja +112 -0
- fabricks/cdc/templates/query/scd2.sql.jinja +114 -0
- fabricks/cdc/templates/query.sql.jinja +11 -0
- fabricks/context/__init__.py +51 -0
- fabricks/context/log.py +26 -0
- fabricks/context/runtime.py +143 -0
- fabricks/context/spark.py +43 -0
- fabricks/context/types.py +123 -0
- fabricks/core/__init__.py +4 -0
- fabricks/core/dags/__init__.py +9 -0
- fabricks/core/dags/base.py +72 -0
- fabricks/core/dags/generator.py +154 -0
- fabricks/core/dags/log.py +14 -0
- fabricks/core/dags/processor.py +163 -0
- fabricks/core/dags/terminator.py +26 -0
- fabricks/core/deploy/__init__.py +12 -0
- fabricks/core/deploy/tables.py +76 -0
- fabricks/core/deploy/views.py +417 -0
- fabricks/core/extenders.py +29 -0
- fabricks/core/jobs/__init__.py +20 -0
- fabricks/core/jobs/base/__init__.py +10 -0
- fabricks/core/jobs/base/checker.py +89 -0
- fabricks/core/jobs/base/configurator.py +323 -0
- fabricks/core/jobs/base/error.py +16 -0
- fabricks/core/jobs/base/generator.py +391 -0
- fabricks/core/jobs/base/invoker.py +119 -0
- fabricks/core/jobs/base/job.py +5 -0
- fabricks/core/jobs/base/processor.py +204 -0
- fabricks/core/jobs/base/types.py +191 -0
- fabricks/core/jobs/bronze.py +333 -0
- fabricks/core/jobs/get_job.py +126 -0
- fabricks/core/jobs/get_job_conf.py +115 -0
- fabricks/core/jobs/get_job_id.py +26 -0
- fabricks/core/jobs/get_jobs.py +89 -0
- fabricks/core/jobs/gold.py +218 -0
- fabricks/core/jobs/silver.py +354 -0
- fabricks/core/parsers/__init__.py +12 -0
- fabricks/core/parsers/base.py +91 -0
- fabricks/core/parsers/decorator.py +11 -0
- fabricks/core/parsers/get_parser.py +25 -0
- fabricks/core/parsers/types.py +6 -0
- fabricks/core/schedules.py +89 -0
- fabricks/core/scripts/__init__.py +13 -0
- fabricks/core/scripts/armageddon.py +82 -0
- fabricks/core/scripts/generate.py +20 -0
- fabricks/core/scripts/job_schema.py +28 -0
- fabricks/core/scripts/optimize.py +45 -0
- fabricks/core/scripts/process.py +9 -0
- fabricks/core/scripts/stats.py +48 -0
- fabricks/core/scripts/steps.py +27 -0
- fabricks/core/scripts/terminate.py +6 -0
- fabricks/core/scripts/vacuum.py +45 -0
- fabricks/core/site_packages.py +55 -0
- fabricks/core/steps/__init__.py +4 -0
- fabricks/core/steps/base.py +282 -0
- fabricks/core/steps/get_step.py +10 -0
- fabricks/core/steps/get_step_conf.py +33 -0
- fabricks/core/steps/types.py +7 -0
- fabricks/core/udfs.py +106 -0
- fabricks/core/utils.py +69 -0
- fabricks/core/views.py +36 -0
- fabricks/metastore/README.md +3 -0
- fabricks/metastore/__init__.py +5 -0
- fabricks/metastore/database.py +71 -0
- fabricks/metastore/pyproject.toml +20 -0
- fabricks/metastore/relational.py +61 -0
- fabricks/metastore/table.py +529 -0
- fabricks/metastore/utils.py +35 -0
- fabricks/metastore/view.py +40 -0
- fabricks/utils/README.md +3 -0
- fabricks/utils/__init__.py +0 -0
- fabricks/utils/azure_queue.py +63 -0
- fabricks/utils/azure_table.py +99 -0
- fabricks/utils/console.py +51 -0
- fabricks/utils/container.py +57 -0
- fabricks/utils/fdict.py +28 -0
- fabricks/utils/helpers.py +89 -0
- fabricks/utils/log.py +153 -0
- fabricks/utils/path.py +206 -0
- fabricks/utils/pip.py +61 -0
- fabricks/utils/pydantic.py +92 -0
- fabricks/utils/pyproject.toml +18 -0
- fabricks/utils/read/__init__.py +11 -0
- fabricks/utils/read/read.py +305 -0
- fabricks/utils/read/read_excel.py +5 -0
- fabricks/utils/read/read_yaml.py +43 -0
- fabricks/utils/read/types.py +3 -0
- fabricks/utils/schema/__init__.py +7 -0
- fabricks/utils/schema/get_json_schema_for_type.py +161 -0
- fabricks/utils/schema/get_schema_for_type.py +93 -0
- fabricks/utils/secret.py +78 -0
- fabricks/utils/sqlglot.py +48 -0
- fabricks/utils/write/__init__.py +8 -0
- fabricks/utils/write/delta.py +46 -0
- fabricks/utils/write/stream.py +27 -0
- fabricks-2024.7.1.5.dist-info/METADATA +212 -0
- fabricks-2024.7.1.5.dist-info/RECORD +154 -0
- fabricks-2024.7.1.5.dist-info/WHEEL +4 -0
fabricks/__init__.py
ADDED
|
File without changes
|
fabricks/api/__init__.py
ADDED
fabricks/api/cdc/scd1.py
ADDED
fabricks/api/cdc/scd2.py
ADDED
fabricks/api/context.py
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
from databricks.sdk.runtime import dbutils, spark
|
|
2
|
+
|
|
3
|
+
from fabricks.context import BRONZE, GOLD, SECRET_SCOPE, SILVER
|
|
4
|
+
from fabricks.core.jobs.base.types import Bronzes, Golds, Silvers
|
|
5
|
+
|
|
6
|
+
# spark
|
|
7
|
+
SPARK = spark
|
|
8
|
+
DBUTILS = dbutils
|
|
9
|
+
|
|
10
|
+
# step
|
|
11
|
+
BRONZES = Bronzes
|
|
12
|
+
SILVERS = Silvers
|
|
13
|
+
GOLDS = Golds
|
|
14
|
+
STEPS = BRONZES + SILVERS + GOLDS
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
__all__ = [
|
|
18
|
+
"BRONZE",
|
|
19
|
+
"Bronzes",
|
|
20
|
+
"BRONZES",
|
|
21
|
+
"DBUTILS",
|
|
22
|
+
"GOLD",
|
|
23
|
+
"Golds",
|
|
24
|
+
"GOLDS",
|
|
25
|
+
"SECRET_SCOPE",
|
|
26
|
+
"SILVER",
|
|
27
|
+
"Silvers",
|
|
28
|
+
"SILVERS",
|
|
29
|
+
"SPARK",
|
|
30
|
+
"STEPS",
|
|
31
|
+
]
|
fabricks/api/core.py
ADDED
fabricks/api/log.py
ADDED
|
File without changes
|
|
File without changes
|
|
@@ -0,0 +1,147 @@
|
|
|
1
|
+
# Databricks notebook source
|
|
2
|
+
# MAGIC %pip install python-dotenv
|
|
3
|
+
|
|
4
|
+
# COMMAND ----------
|
|
5
|
+
|
|
6
|
+
import os
|
|
7
|
+
import subprocess
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
|
|
10
|
+
from databricks.sdk.runtime import dbutils
|
|
11
|
+
from dotenv import load_dotenv
|
|
12
|
+
|
|
13
|
+
# COMMAND ----------
|
|
14
|
+
|
|
15
|
+
load_dotenv()
|
|
16
|
+
|
|
17
|
+
# COMMAND ----------
|
|
18
|
+
|
|
19
|
+
try:
|
|
20
|
+
dbutils.fs.ls("mnt/fabricks")
|
|
21
|
+
except Exception:
|
|
22
|
+
print("fabricks container not mounted")
|
|
23
|
+
|
|
24
|
+
# COMMAND ----------
|
|
25
|
+
|
|
26
|
+
try:
|
|
27
|
+
dbutils.fs.ls("mnt/fabricks/versions")
|
|
28
|
+
except Exception:
|
|
29
|
+
print("fabricks not found")
|
|
30
|
+
|
|
31
|
+
# COMMAND ----------
|
|
32
|
+
|
|
33
|
+
runtime = os.environ.get("path_runtime")
|
|
34
|
+
assert runtime
|
|
35
|
+
if not Path(runtime).exists():
|
|
36
|
+
print("runtime not found")
|
|
37
|
+
|
|
38
|
+
# COMMAND ----------
|
|
39
|
+
|
|
40
|
+
notebooks = os.environ.get("path_notebooks")
|
|
41
|
+
assert notebooks
|
|
42
|
+
if not Path(notebooks).exists():
|
|
43
|
+
print("notebooks not found")
|
|
44
|
+
|
|
45
|
+
# COMMAND ----------
|
|
46
|
+
|
|
47
|
+
scripts = os.environ.get("path_scripts")
|
|
48
|
+
assert scripts
|
|
49
|
+
if not Path(scripts).exists():
|
|
50
|
+
print("scripts not found")
|
|
51
|
+
|
|
52
|
+
# COMMAND ----------
|
|
53
|
+
|
|
54
|
+
abfss_wheels = "abfss://fabricks-wheels@bmsstaprdeuwsoftware.dfs.core.windows.net"
|
|
55
|
+
|
|
56
|
+
# COMMAND ----------
|
|
57
|
+
|
|
58
|
+
version = os.environ.get("fabricks_version")
|
|
59
|
+
|
|
60
|
+
# COMMAND ----------
|
|
61
|
+
|
|
62
|
+
mnt_version = f"dbfs:/mnt/fabricks/versions/{version}"
|
|
63
|
+
fuse_mnt_version = f"/dbfs/mnt/fabricks/versions/{version}"
|
|
64
|
+
|
|
65
|
+
# COMMAND ----------
|
|
66
|
+
|
|
67
|
+
try:
|
|
68
|
+
for f in dbutils.fs.ls(mnt_version):
|
|
69
|
+
dbutils.fs.rm(f.path, True)
|
|
70
|
+
except Exception:
|
|
71
|
+
pass
|
|
72
|
+
|
|
73
|
+
dbutils.fs.rm(mnt_version, True)
|
|
74
|
+
dbutils.fs.mkdirs(mnt_version)
|
|
75
|
+
|
|
76
|
+
# COMMAND ----------
|
|
77
|
+
|
|
78
|
+
dbutils.fs.rm(mnt_version, True)
|
|
79
|
+
dbutils.fs.mkdirs(mnt_version)
|
|
80
|
+
|
|
81
|
+
# COMMAND ----------
|
|
82
|
+
|
|
83
|
+
print("copying version to", f"{mnt_version}/version")
|
|
84
|
+
|
|
85
|
+
for f in dbutils.fs.ls(f"{abfss_wheels}/{version}"):
|
|
86
|
+
to = f"{mnt_version}/{f.name}"
|
|
87
|
+
|
|
88
|
+
try:
|
|
89
|
+
dbutils.fs.ls(to)
|
|
90
|
+
except Exception:
|
|
91
|
+
print("uploading", f.name)
|
|
92
|
+
dbutils.fs.cp(f.path, to, recurse=True)
|
|
93
|
+
|
|
94
|
+
# COMMAND ----------
|
|
95
|
+
|
|
96
|
+
print("pip install requirements.txt")
|
|
97
|
+
|
|
98
|
+
out = subprocess.run(
|
|
99
|
+
[
|
|
100
|
+
"pip",
|
|
101
|
+
"install",
|
|
102
|
+
"--no-index",
|
|
103
|
+
f"--find-links={fuse_mnt_version}/wheels",
|
|
104
|
+
"-r",
|
|
105
|
+
f"{fuse_mnt_version}/requirements.txt",
|
|
106
|
+
],
|
|
107
|
+
capture_output=True,
|
|
108
|
+
)
|
|
109
|
+
|
|
110
|
+
if out.returncode == 1:
|
|
111
|
+
raise ValueError(out.stderr)
|
|
112
|
+
|
|
113
|
+
# COMMAND ----------
|
|
114
|
+
|
|
115
|
+
latest = os.environ.get("latest")
|
|
116
|
+
assert latest
|
|
117
|
+
latest = latest.lower() == "true"
|
|
118
|
+
|
|
119
|
+
# COMMAND ----------
|
|
120
|
+
|
|
121
|
+
versions = [version, "2023.12.*"] if latest else [version]
|
|
122
|
+
|
|
123
|
+
# COMMAND ----------
|
|
124
|
+
|
|
125
|
+
print("deploy init script")
|
|
126
|
+
|
|
127
|
+
for v in versions:
|
|
128
|
+
path = f"{scripts}/{v}.sh"
|
|
129
|
+
|
|
130
|
+
with open(path, "w") as sh:
|
|
131
|
+
sh.write(
|
|
132
|
+
f"""
|
|
133
|
+
sudo echo FABRICKS_RUNTIME={runtime} >> /etc/environment
|
|
134
|
+
sudo echo FABRICKS_NOTEBOOKS={notebooks}/{version} >> /etc/environment
|
|
135
|
+
sudo echo FABRICKS_VERSION={version} >> /etc/environment
|
|
136
|
+
|
|
137
|
+
/databricks/python/bin/pip install --no-index --find-links='{fuse_mnt_version}/wheels' -r '{fuse_mnt_version}/requirements.txt'
|
|
138
|
+
/databricks/python/bin/pip install sqlglot
|
|
139
|
+
/databricks/python/bin/pip install jinja2
|
|
140
|
+
""".replace(" ", "").strip()
|
|
141
|
+
)
|
|
142
|
+
|
|
143
|
+
# COMMAND ----------
|
|
144
|
+
|
|
145
|
+
dbutils.notebook.exit("exit (0)")
|
|
146
|
+
|
|
147
|
+
# COMMAND ----------
|
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
# Databricks notebook source
|
|
2
|
+
# MAGIC %pip install python-dotenv
|
|
3
|
+
|
|
4
|
+
# COMMAND ----------
|
|
5
|
+
|
|
6
|
+
# MAGIC %pip install databricks_cli
|
|
7
|
+
|
|
8
|
+
# COMMAND ----------
|
|
9
|
+
|
|
10
|
+
import os
|
|
11
|
+
from importlib import resources
|
|
12
|
+
from pathlib import Path
|
|
13
|
+
|
|
14
|
+
from databricks.sdk.runtime import dbutils
|
|
15
|
+
from databricks_cli.sdk.api_client import ApiClient # type: ignore
|
|
16
|
+
from databricks_cli.workspace.api import WorkspaceApi # type: ignore
|
|
17
|
+
from dotenv import load_dotenv
|
|
18
|
+
|
|
19
|
+
from fabricks.api import notebooks as src
|
|
20
|
+
|
|
21
|
+
# COMMAND ----------
|
|
22
|
+
|
|
23
|
+
load_dotenv()
|
|
24
|
+
|
|
25
|
+
# COMMAND ----------
|
|
26
|
+
|
|
27
|
+
host = os.environ.get("databricks_host")
|
|
28
|
+
token = os.environ.get("databricks_token")
|
|
29
|
+
version = os.environ.get("fabricks_version")
|
|
30
|
+
root = os.environ.get("path_notebooks")
|
|
31
|
+
latest = os.environ.get("latest")
|
|
32
|
+
assert latest
|
|
33
|
+
latest = latest.lower() == "true"
|
|
34
|
+
runtime = os.environ.get("path_runtime")
|
|
35
|
+
|
|
36
|
+
# COMMAND ----------
|
|
37
|
+
|
|
38
|
+
notebooks = ["cluster", "initialize", "job", "log", "optimize", "run", "terminate"]
|
|
39
|
+
|
|
40
|
+
# COMMAND ----------
|
|
41
|
+
|
|
42
|
+
client = ApiClient(host=host, token=token)
|
|
43
|
+
workspace_api = WorkspaceApi(client)
|
|
44
|
+
|
|
45
|
+
# COMMAND ----------
|
|
46
|
+
|
|
47
|
+
versions = [version, "latest"] if latest else [version]
|
|
48
|
+
|
|
49
|
+
# COMMAND ----------
|
|
50
|
+
|
|
51
|
+
for v in versions:
|
|
52
|
+
print(f"deploy {v}")
|
|
53
|
+
|
|
54
|
+
p = f"{root}/{v}"
|
|
55
|
+
if not Path(p).exists():
|
|
56
|
+
os.mkdir(p)
|
|
57
|
+
|
|
58
|
+
for n in notebooks:
|
|
59
|
+
notebook = resources.files(src) / f"{n}.py"
|
|
60
|
+
workspace_api.import_workspace(
|
|
61
|
+
source_path=notebook,
|
|
62
|
+
target_path=f"{p.replace('/Workspace', '')}/{n}.py",
|
|
63
|
+
fmt="AUTO",
|
|
64
|
+
language="PYTHON",
|
|
65
|
+
is_overwrite=True,
|
|
66
|
+
)
|
|
67
|
+
|
|
68
|
+
# COMMAND ----------
|
|
69
|
+
|
|
70
|
+
if latest:
|
|
71
|
+
p = f"{runtime}/notebooks"
|
|
72
|
+
for n in notebooks:
|
|
73
|
+
notebook = resources.files(src) / f"{n}.py"
|
|
74
|
+
workspace_api.import_workspace(
|
|
75
|
+
source_path=notebook,
|
|
76
|
+
target_path=f"{p.replace('/Workspace', '')}/{n}.py",
|
|
77
|
+
fmt="AUTO",
|
|
78
|
+
language="PYTHON",
|
|
79
|
+
is_overwrite=True,
|
|
80
|
+
)
|
|
81
|
+
|
|
82
|
+
# COMMAND ----------
|
|
83
|
+
|
|
84
|
+
dbutils.notebook.exit("exit (0)")
|
|
85
|
+
|
|
86
|
+
# COMMAND ----------
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
# Databricks notebook source
|
|
2
|
+
from databricks.sdk.runtime import dbutils, display
|
|
3
|
+
|
|
4
|
+
from fabricks.core.scripts import generate
|
|
5
|
+
|
|
6
|
+
# COMMAND ----------
|
|
7
|
+
|
|
8
|
+
dbutils.widgets.text("schedule", "---")
|
|
9
|
+
|
|
10
|
+
# COMMAND ----------
|
|
11
|
+
|
|
12
|
+
schedule = dbutils.widgets.get("schedule")
|
|
13
|
+
assert schedule != "---", "no schedule provided"
|
|
14
|
+
|
|
15
|
+
# COMMAND ----------
|
|
16
|
+
|
|
17
|
+
print(schedule)
|
|
18
|
+
|
|
19
|
+
# COMMAND ----------
|
|
20
|
+
|
|
21
|
+
schedule_id, job_df, dependency_df = generate(schedule=schedule)
|
|
22
|
+
|
|
23
|
+
# COMMAND ----------
|
|
24
|
+
|
|
25
|
+
display(job_df)
|
|
26
|
+
|
|
27
|
+
# COMMAND ----------
|
|
28
|
+
|
|
29
|
+
display(dependency_df)
|
|
30
|
+
|
|
31
|
+
# COMMAND ----------
|
|
32
|
+
|
|
33
|
+
dbutils.jobs.taskValues.set(key="schedule_id", value=schedule_id)
|
|
34
|
+
dbutils.jobs.taskValues.set(key="schedule", value=schedule)
|
|
35
|
+
|
|
36
|
+
# COMMAND ----------
|
|
37
|
+
|
|
38
|
+
dbutils.notebook.exit("exit (0)")
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
# Databricks notebook source
|
|
2
|
+
from databricks.sdk.runtime import dbutils
|
|
3
|
+
from pyspark.errors.exceptions.base import IllegalArgumentException
|
|
4
|
+
|
|
5
|
+
from fabricks.core.scripts import optimize
|
|
6
|
+
|
|
7
|
+
# COMMAND ----------
|
|
8
|
+
|
|
9
|
+
dbutils.widgets.text("schedule_id", "---")
|
|
10
|
+
|
|
11
|
+
# COMMAND ----------
|
|
12
|
+
|
|
13
|
+
try:
|
|
14
|
+
schedule_id = dbutils.jobs.taskValues.get(taskKey="initialize", key="schedule_id")
|
|
15
|
+
except (TypeError, IllegalArgumentException, ValueError):
|
|
16
|
+
schedule_id = dbutils.widgets.get("schedule_id")
|
|
17
|
+
schedule_id = None if schedule_id == "---" else schedule_id
|
|
18
|
+
|
|
19
|
+
# COMMAND ----------
|
|
20
|
+
|
|
21
|
+
optimize(schedule_id=schedule_id)
|
|
22
|
+
|
|
23
|
+
# COMMAND ----------
|
|
24
|
+
|
|
25
|
+
dbutils.notebook.exit("exit (0)")
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
# Databricks notebook source
|
|
2
|
+
from databricks.sdk.runtime import dbutils
|
|
3
|
+
from pyspark.errors.exceptions.base import IllegalArgumentException
|
|
4
|
+
|
|
5
|
+
from fabricks.core.scripts import process
|
|
6
|
+
|
|
7
|
+
# COMMAND ----------
|
|
8
|
+
|
|
9
|
+
dbutils.widgets.text("step", "---")
|
|
10
|
+
dbutils.widgets.text("schedule_id", "---")
|
|
11
|
+
|
|
12
|
+
# COMMAND ----------
|
|
13
|
+
|
|
14
|
+
try:
|
|
15
|
+
schedule_id = dbutils.jobs.taskValues.get(taskKey="initialize", key="schedule_id")
|
|
16
|
+
except (TypeError, IllegalArgumentException, ValueError):
|
|
17
|
+
schedule_id = dbutils.widgets.get("schedule_id")
|
|
18
|
+
assert schedule_id != "---", "no schedule_id provided"
|
|
19
|
+
|
|
20
|
+
assert schedule_id is not None
|
|
21
|
+
|
|
22
|
+
# COMMAND ----------
|
|
23
|
+
|
|
24
|
+
step = dbutils.widgets.get("step")
|
|
25
|
+
assert step != "---", "no step provided"
|
|
26
|
+
|
|
27
|
+
# COMMAND ----------
|
|
28
|
+
|
|
29
|
+
schedule = dbutils.widgets.get("schedule")
|
|
30
|
+
assert schedule != "---", "no schedule provided"
|
|
31
|
+
|
|
32
|
+
# COMMAND ----------
|
|
33
|
+
|
|
34
|
+
print(schedule_id)
|
|
35
|
+
|
|
36
|
+
# COMMAND ----------
|
|
37
|
+
|
|
38
|
+
print(step)
|
|
39
|
+
|
|
40
|
+
# COMMAND ----------
|
|
41
|
+
|
|
42
|
+
print(schedule)
|
|
43
|
+
|
|
44
|
+
# COMMAND ----------
|
|
45
|
+
|
|
46
|
+
process(schedule_id=schedule_id, schedule=schedule, step=step)
|
|
47
|
+
|
|
48
|
+
# COMMAND ----------
|
|
49
|
+
|
|
50
|
+
dbutils.notebook.exit("exit (0)")
|
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
# Databricks notebook source
|
|
2
|
+
import json
|
|
3
|
+
|
|
4
|
+
from databricks.sdk.runtime import dbutils
|
|
5
|
+
|
|
6
|
+
from fabricks.core.dags.log import DagsLogger, DagsTableLogger
|
|
7
|
+
from fabricks.core.jobs import get_job
|
|
8
|
+
|
|
9
|
+
# COMMAND ----------
|
|
10
|
+
|
|
11
|
+
dbutils.widgets.text("step", "---")
|
|
12
|
+
dbutils.widgets.text("job_id", "---")
|
|
13
|
+
dbutils.widgets.text("job", "--")
|
|
14
|
+
dbutils.widgets.text("schedule_id", "---")
|
|
15
|
+
dbutils.widgets.text("schedule", "---")
|
|
16
|
+
|
|
17
|
+
# COMMAND ----------
|
|
18
|
+
|
|
19
|
+
step = dbutils.widgets.get("step")
|
|
20
|
+
assert step != "---"
|
|
21
|
+
|
|
22
|
+
# COMMAND ----------
|
|
23
|
+
|
|
24
|
+
job_id = dbutils.widgets.get("job_id")
|
|
25
|
+
assert job_id != "---"
|
|
26
|
+
|
|
27
|
+
# COMMAND ----------
|
|
28
|
+
|
|
29
|
+
job = dbutils.widgets.get("job")
|
|
30
|
+
assert job != "---"
|
|
31
|
+
|
|
32
|
+
# COMMAND ----------
|
|
33
|
+
|
|
34
|
+
schedule_id = dbutils.widgets.get("schedule_id")
|
|
35
|
+
assert schedule_id != "---"
|
|
36
|
+
|
|
37
|
+
# COMMAND ----------
|
|
38
|
+
|
|
39
|
+
schedule = dbutils.widgets.get("schedule")
|
|
40
|
+
assert schedule != "---"
|
|
41
|
+
|
|
42
|
+
# COMMAND ----------
|
|
43
|
+
|
|
44
|
+
context = json.loads(dbutils.notebook.entry_point.getDbutils().notebook().getContext().toJson()) # type: ignore
|
|
45
|
+
notebook_id = context.get("tags").get("jobId")
|
|
46
|
+
|
|
47
|
+
# COMMAND ----------
|
|
48
|
+
|
|
49
|
+
job = get_job(step=step, job_id=job_id)
|
|
50
|
+
|
|
51
|
+
# COMMAND ----------
|
|
52
|
+
|
|
53
|
+
print(job.qualified_name)
|
|
54
|
+
|
|
55
|
+
# COMMAND ----------
|
|
56
|
+
|
|
57
|
+
extra = {
|
|
58
|
+
"partition_key": schedule_id,
|
|
59
|
+
"schedule_id": schedule_id,
|
|
60
|
+
"schedule": schedule,
|
|
61
|
+
"step": step,
|
|
62
|
+
"job": job,
|
|
63
|
+
"notebook_id": notebook_id,
|
|
64
|
+
"target": "buffer",
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
# COMMAND ----------
|
|
68
|
+
|
|
69
|
+
DagsLogger.info("running", extra=extra)
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
# COMMAND ----------
|
|
73
|
+
|
|
74
|
+
try:
|
|
75
|
+
job.run(schedule_id=schedule_id, schedule=schedule)
|
|
76
|
+
DagsLogger.info("done", extra=extra)
|
|
77
|
+
|
|
78
|
+
except Exception as e:
|
|
79
|
+
DagsLogger.exception("failed", extra=extra)
|
|
80
|
+
raise e
|
|
81
|
+
|
|
82
|
+
finally:
|
|
83
|
+
DagsTableLogger.flush()
|
|
84
|
+
|
|
85
|
+
# COMMAND ----------
|
|
86
|
+
|
|
87
|
+
dbutils.notebook.exit("exit (0)")
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
# Databricks notebook source
|
|
2
|
+
from databricks.sdk.runtime import dbutils
|
|
3
|
+
from pyspark.errors.exceptions.base import IllegalArgumentException
|
|
4
|
+
|
|
5
|
+
from fabricks.core.scripts import terminate
|
|
6
|
+
|
|
7
|
+
# COMMAND ----------
|
|
8
|
+
|
|
9
|
+
try:
|
|
10
|
+
schedule_id = dbutils.jobs.taskValues.get(taskKey="initialize", key="schedule_id")
|
|
11
|
+
except (TypeError, IllegalArgumentException, ValueError):
|
|
12
|
+
schedule_id = dbutils.widgets.get("schedule_id")
|
|
13
|
+
assert schedule_id != "---", "no schedule_id provided"
|
|
14
|
+
|
|
15
|
+
assert schedule_id is not None
|
|
16
|
+
|
|
17
|
+
# COMMAND ----------
|
|
18
|
+
|
|
19
|
+
print(schedule_id)
|
|
20
|
+
|
|
21
|
+
# COMMAND ----------
|
|
22
|
+
|
|
23
|
+
terminate(schedule_id=schedule_id)
|
|
24
|
+
|
|
25
|
+
# COMMAND ----------
|
|
26
|
+
|
|
27
|
+
dbutils.notebook.exit("exit (0)")
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
# Databricks notebook source
|
|
2
|
+
from databricks.sdk.runtime import dbutils
|
|
3
|
+
from pyspark.errors.exceptions.base import IllegalArgumentException
|
|
4
|
+
|
|
5
|
+
from fabricks.core.scripts import vacuum
|
|
6
|
+
|
|
7
|
+
# COMMAND ----------
|
|
8
|
+
|
|
9
|
+
dbutils.widgets.text("schedule_id", "---")
|
|
10
|
+
|
|
11
|
+
# COMMAND ----------
|
|
12
|
+
|
|
13
|
+
try:
|
|
14
|
+
schedule_id = dbutils.jobs.taskValues.get(taskKey="initialize", key="schedule_id")
|
|
15
|
+
except (TypeError, IllegalArgumentException, ValueError):
|
|
16
|
+
schedule_id = dbutils.widgets.get("schedule_id")
|
|
17
|
+
schedule_id = None if schedule_id == "---" else schedule_id
|
|
18
|
+
|
|
19
|
+
# COMMAND ----------
|
|
20
|
+
|
|
21
|
+
vacuum(schedule_id=schedule_id)
|
|
22
|
+
|
|
23
|
+
# COMMAND ----------
|
|
24
|
+
|
|
25
|
+
dbutils.notebook.exit("exit (0)")
|
fabricks/api/parsers.py
ADDED
fabricks/api/udfs.py
ADDED
fabricks/api/utils.py
ADDED
fabricks/cdc/__init__.py
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
from fabricks.cdc.base import BaseCDC, ChangeDataCaptures
|
|
2
|
+
from fabricks.cdc.cdc import CDC
|
|
3
|
+
from fabricks.cdc.nocdc import NoCDC
|
|
4
|
+
from fabricks.cdc.scd1 import SCD1
|
|
5
|
+
from fabricks.cdc.scd2 import SCD2
|
|
6
|
+
|
|
7
|
+
__all__ = [
|
|
8
|
+
"BaseCDC",
|
|
9
|
+
"CDC",
|
|
10
|
+
"ChangeDataCaptures",
|
|
11
|
+
"NoCDC",
|
|
12
|
+
"SCD1",
|
|
13
|
+
"SCD2",
|
|
14
|
+
]
|