fabricks 2024.7.1.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (154) hide show
  1. fabricks/__init__.py +0 -0
  2. fabricks/api/__init__.py +7 -0
  3. fabricks/api/cdc/__init__.py +6 -0
  4. fabricks/api/cdc/nocdc.py +3 -0
  5. fabricks/api/cdc/scd1.py +3 -0
  6. fabricks/api/cdc/scd2.py +3 -0
  7. fabricks/api/context.py +31 -0
  8. fabricks/api/core.py +4 -0
  9. fabricks/api/extenders.py +3 -0
  10. fabricks/api/log.py +3 -0
  11. fabricks/api/metastore/__init__.py +10 -0
  12. fabricks/api/metastore/database.py +3 -0
  13. fabricks/api/metastore/table.py +3 -0
  14. fabricks/api/metastore/view.py +6 -0
  15. fabricks/api/notebooks/__init__.py +0 -0
  16. fabricks/api/notebooks/cluster.py +6 -0
  17. fabricks/api/notebooks/deploy/__init__.py +0 -0
  18. fabricks/api/notebooks/deploy/fabricks.py +147 -0
  19. fabricks/api/notebooks/deploy/notebooks.py +86 -0
  20. fabricks/api/notebooks/initialize.py +38 -0
  21. fabricks/api/notebooks/optimize.py +25 -0
  22. fabricks/api/notebooks/process.py +50 -0
  23. fabricks/api/notebooks/run.py +87 -0
  24. fabricks/api/notebooks/terminate.py +27 -0
  25. fabricks/api/notebooks/vacuum.py +25 -0
  26. fabricks/api/parsers.py +3 -0
  27. fabricks/api/udfs.py +3 -0
  28. fabricks/api/utils.py +9 -0
  29. fabricks/cdc/__init__.py +14 -0
  30. fabricks/cdc/base/__init__.py +4 -0
  31. fabricks/cdc/base/cdc.py +5 -0
  32. fabricks/cdc/base/configurator.py +145 -0
  33. fabricks/cdc/base/generator.py +117 -0
  34. fabricks/cdc/base/merger.py +107 -0
  35. fabricks/cdc/base/processor.py +338 -0
  36. fabricks/cdc/base/types.py +3 -0
  37. fabricks/cdc/cdc.py +5 -0
  38. fabricks/cdc/nocdc.py +19 -0
  39. fabricks/cdc/scd.py +21 -0
  40. fabricks/cdc/scd1.py +15 -0
  41. fabricks/cdc/scd2.py +15 -0
  42. fabricks/cdc/templates/__init__.py +0 -0
  43. fabricks/cdc/templates/merge/scd1.sql.jinja +72 -0
  44. fabricks/cdc/templates/merge/scd2.sql.jinja +54 -0
  45. fabricks/cdc/templates/merge.sql.jinja +2 -0
  46. fabricks/cdc/templates/query/__init__.py +0 -0
  47. fabricks/cdc/templates/query/base.sql.jinja +34 -0
  48. fabricks/cdc/templates/query/context.sql.jinja +95 -0
  49. fabricks/cdc/templates/query/current.sql.jinja +32 -0
  50. fabricks/cdc/templates/query/deduplicate_hash.sql.jinja +21 -0
  51. fabricks/cdc/templates/query/deduplicate_key.sql.jinja +14 -0
  52. fabricks/cdc/templates/query/filter.sql.jinja +71 -0
  53. fabricks/cdc/templates/query/final.sql.jinja +1 -0
  54. fabricks/cdc/templates/query/hash.sql.jinja +1 -0
  55. fabricks/cdc/templates/query/nocdc.sql.jinja +10 -0
  56. fabricks/cdc/templates/query/rectify.sql.jinja +120 -0
  57. fabricks/cdc/templates/query/scd1.sql.jinja +112 -0
  58. fabricks/cdc/templates/query/scd2.sql.jinja +114 -0
  59. fabricks/cdc/templates/query.sql.jinja +11 -0
  60. fabricks/context/__init__.py +51 -0
  61. fabricks/context/log.py +26 -0
  62. fabricks/context/runtime.py +143 -0
  63. fabricks/context/spark.py +43 -0
  64. fabricks/context/types.py +123 -0
  65. fabricks/core/__init__.py +4 -0
  66. fabricks/core/dags/__init__.py +9 -0
  67. fabricks/core/dags/base.py +72 -0
  68. fabricks/core/dags/generator.py +154 -0
  69. fabricks/core/dags/log.py +14 -0
  70. fabricks/core/dags/processor.py +163 -0
  71. fabricks/core/dags/terminator.py +26 -0
  72. fabricks/core/deploy/__init__.py +12 -0
  73. fabricks/core/deploy/tables.py +76 -0
  74. fabricks/core/deploy/views.py +417 -0
  75. fabricks/core/extenders.py +29 -0
  76. fabricks/core/jobs/__init__.py +20 -0
  77. fabricks/core/jobs/base/__init__.py +10 -0
  78. fabricks/core/jobs/base/checker.py +89 -0
  79. fabricks/core/jobs/base/configurator.py +323 -0
  80. fabricks/core/jobs/base/error.py +16 -0
  81. fabricks/core/jobs/base/generator.py +391 -0
  82. fabricks/core/jobs/base/invoker.py +119 -0
  83. fabricks/core/jobs/base/job.py +5 -0
  84. fabricks/core/jobs/base/processor.py +204 -0
  85. fabricks/core/jobs/base/types.py +191 -0
  86. fabricks/core/jobs/bronze.py +333 -0
  87. fabricks/core/jobs/get_job.py +126 -0
  88. fabricks/core/jobs/get_job_conf.py +115 -0
  89. fabricks/core/jobs/get_job_id.py +26 -0
  90. fabricks/core/jobs/get_jobs.py +89 -0
  91. fabricks/core/jobs/gold.py +218 -0
  92. fabricks/core/jobs/silver.py +354 -0
  93. fabricks/core/parsers/__init__.py +12 -0
  94. fabricks/core/parsers/base.py +91 -0
  95. fabricks/core/parsers/decorator.py +11 -0
  96. fabricks/core/parsers/get_parser.py +25 -0
  97. fabricks/core/parsers/types.py +6 -0
  98. fabricks/core/schedules.py +89 -0
  99. fabricks/core/scripts/__init__.py +13 -0
  100. fabricks/core/scripts/armageddon.py +82 -0
  101. fabricks/core/scripts/generate.py +20 -0
  102. fabricks/core/scripts/job_schema.py +28 -0
  103. fabricks/core/scripts/optimize.py +45 -0
  104. fabricks/core/scripts/process.py +9 -0
  105. fabricks/core/scripts/stats.py +48 -0
  106. fabricks/core/scripts/steps.py +27 -0
  107. fabricks/core/scripts/terminate.py +6 -0
  108. fabricks/core/scripts/vacuum.py +45 -0
  109. fabricks/core/site_packages.py +55 -0
  110. fabricks/core/steps/__init__.py +4 -0
  111. fabricks/core/steps/base.py +282 -0
  112. fabricks/core/steps/get_step.py +10 -0
  113. fabricks/core/steps/get_step_conf.py +33 -0
  114. fabricks/core/steps/types.py +7 -0
  115. fabricks/core/udfs.py +106 -0
  116. fabricks/core/utils.py +69 -0
  117. fabricks/core/views.py +36 -0
  118. fabricks/metastore/README.md +3 -0
  119. fabricks/metastore/__init__.py +5 -0
  120. fabricks/metastore/database.py +71 -0
  121. fabricks/metastore/pyproject.toml +20 -0
  122. fabricks/metastore/relational.py +61 -0
  123. fabricks/metastore/table.py +529 -0
  124. fabricks/metastore/utils.py +35 -0
  125. fabricks/metastore/view.py +40 -0
  126. fabricks/utils/README.md +3 -0
  127. fabricks/utils/__init__.py +0 -0
  128. fabricks/utils/azure_queue.py +63 -0
  129. fabricks/utils/azure_table.py +99 -0
  130. fabricks/utils/console.py +51 -0
  131. fabricks/utils/container.py +57 -0
  132. fabricks/utils/fdict.py +28 -0
  133. fabricks/utils/helpers.py +89 -0
  134. fabricks/utils/log.py +153 -0
  135. fabricks/utils/path.py +206 -0
  136. fabricks/utils/pip.py +61 -0
  137. fabricks/utils/pydantic.py +92 -0
  138. fabricks/utils/pyproject.toml +18 -0
  139. fabricks/utils/read/__init__.py +11 -0
  140. fabricks/utils/read/read.py +305 -0
  141. fabricks/utils/read/read_excel.py +5 -0
  142. fabricks/utils/read/read_yaml.py +43 -0
  143. fabricks/utils/read/types.py +3 -0
  144. fabricks/utils/schema/__init__.py +7 -0
  145. fabricks/utils/schema/get_json_schema_for_type.py +161 -0
  146. fabricks/utils/schema/get_schema_for_type.py +93 -0
  147. fabricks/utils/secret.py +78 -0
  148. fabricks/utils/sqlglot.py +48 -0
  149. fabricks/utils/write/__init__.py +8 -0
  150. fabricks/utils/write/delta.py +46 -0
  151. fabricks/utils/write/stream.py +27 -0
  152. fabricks-2024.7.1.5.dist-info/METADATA +212 -0
  153. fabricks-2024.7.1.5.dist-info/RECORD +154 -0
  154. fabricks-2024.7.1.5.dist-info/WHEEL +4 -0
fabricks/__init__.py ADDED
File without changes
@@ -0,0 +1,7 @@
1
+ from fabricks.api.core import get_job, get_jobs, get_step
2
+
3
+ __all__ = [
4
+ "get_job",
5
+ "get_jobs",
6
+ "get_step",
7
+ ]
@@ -0,0 +1,6 @@
1
+ from fabricks.api.cdc.nocdc import NoCDC
2
+ from fabricks.api.cdc.scd1 import SCD1
3
+ from fabricks.api.cdc.scd2 import SCD2
4
+ from fabricks.cdc.cdc import CDC
5
+
6
+ __all__ = ["CDC", "SCD1", "SCD2", "NoCDC"]
@@ -0,0 +1,3 @@
1
+ from fabricks.cdc.nocdc import NoCDC
2
+
3
+ __all__ = ["NoCDC"]
@@ -0,0 +1,3 @@
1
+ from fabricks.cdc.scd1 import SCD1
2
+
3
+ __all__ = ["SCD1"]
@@ -0,0 +1,3 @@
1
+ from fabricks.cdc.scd2 import SCD2
2
+
3
+ __all__ = ["SCD2"]
@@ -0,0 +1,31 @@
1
+ from databricks.sdk.runtime import dbutils, spark
2
+
3
+ from fabricks.context import BRONZE, GOLD, SECRET_SCOPE, SILVER
4
+ from fabricks.core.jobs.base.types import Bronzes, Golds, Silvers
5
+
6
+ # spark
7
+ SPARK = spark
8
+ DBUTILS = dbutils
9
+
10
+ # step
11
+ BRONZES = Bronzes
12
+ SILVERS = Silvers
13
+ GOLDS = Golds
14
+ STEPS = BRONZES + SILVERS + GOLDS
15
+
16
+
17
+ __all__ = [
18
+ "BRONZE",
19
+ "Bronzes",
20
+ "BRONZES",
21
+ "DBUTILS",
22
+ "GOLD",
23
+ "Golds",
24
+ "GOLDS",
25
+ "SECRET_SCOPE",
26
+ "SILVER",
27
+ "Silvers",
28
+ "SILVERS",
29
+ "SPARK",
30
+ "STEPS",
31
+ ]
fabricks/api/core.py ADDED
@@ -0,0 +1,4 @@
1
+ from fabricks.core.jobs import get_job, get_jobs
2
+ from fabricks.core.steps import get_step
3
+
4
+ __all__ = ["get_job", "get_jobs", "get_step"]
@@ -0,0 +1,3 @@
1
+ from fabricks.core.extenders import extender
2
+
3
+ __all__ = ["extender"]
fabricks/api/log.py ADDED
@@ -0,0 +1,3 @@
1
+ from fabricks.context.log import Logger, TableLogger, flush
2
+
3
+ __all__ = ["Logger", "TableLogger", "flush"]
@@ -0,0 +1,10 @@
1
+ from fabricks.api.metastore.database import Database
2
+ from fabricks.api.metastore.table import Table
3
+ from fabricks.api.metastore.view import View, create_or_replace_view
4
+
5
+ __all__ = [
6
+ "create_or_replace_view",
7
+ "Database",
8
+ "Table",
9
+ "View",
10
+ ]
@@ -0,0 +1,3 @@
1
+ from fabricks.metastore import Database
2
+
3
+ __all__ = ["Database"]
@@ -0,0 +1,3 @@
1
+ from fabricks.metastore import Table
2
+
3
+ __all__ = ["Table"]
@@ -0,0 +1,6 @@
1
+ from fabricks.metastore import View
2
+
3
+ create_or_replace_view = View.create_or_replace
4
+
5
+
6
+ __all__ = ["View", "create_or_replace_view"]
File without changes
@@ -0,0 +1,6 @@
1
+ # Databricks notebook source
2
+ from databricks.sdk.runtime import dbutils
3
+
4
+ # COMMAND ----------
5
+
6
+ dbutils.notebook.exit("exit (0)")
File without changes
@@ -0,0 +1,147 @@
1
+ # Databricks notebook source
2
+ # MAGIC %pip install python-dotenv
3
+
4
+ # COMMAND ----------
5
+
6
+ import os
7
+ import subprocess
8
+ from pathlib import Path
9
+
10
+ from databricks.sdk.runtime import dbutils
11
+ from dotenv import load_dotenv
12
+
13
+ # COMMAND ----------
14
+
15
+ load_dotenv()
16
+
17
+ # COMMAND ----------
18
+
19
+ try:
20
+ dbutils.fs.ls("mnt/fabricks")
21
+ except Exception:
22
+ print("fabricks container not mounted")
23
+
24
+ # COMMAND ----------
25
+
26
+ try:
27
+ dbutils.fs.ls("mnt/fabricks/versions")
28
+ except Exception:
29
+ print("fabricks not found")
30
+
31
+ # COMMAND ----------
32
+
33
+ runtime = os.environ.get("path_runtime")
34
+ assert runtime
35
+ if not Path(runtime).exists():
36
+ print("runtime not found")
37
+
38
+ # COMMAND ----------
39
+
40
+ notebooks = os.environ.get("path_notebooks")
41
+ assert notebooks
42
+ if not Path(notebooks).exists():
43
+ print("notebooks not found")
44
+
45
+ # COMMAND ----------
46
+
47
+ scripts = os.environ.get("path_scripts")
48
+ assert scripts
49
+ if not Path(scripts).exists():
50
+ print("scripts not found")
51
+
52
+ # COMMAND ----------
53
+
54
+ abfss_wheels = "abfss://fabricks-wheels@bmsstaprdeuwsoftware.dfs.core.windows.net"
55
+
56
+ # COMMAND ----------
57
+
58
+ version = os.environ.get("fabricks_version")
59
+
60
+ # COMMAND ----------
61
+
62
+ mnt_version = f"dbfs:/mnt/fabricks/versions/{version}"
63
+ fuse_mnt_version = f"/dbfs/mnt/fabricks/versions/{version}"
64
+
65
+ # COMMAND ----------
66
+
67
+ try:
68
+ for f in dbutils.fs.ls(mnt_version):
69
+ dbutils.fs.rm(f.path, True)
70
+ except Exception:
71
+ pass
72
+
73
+ dbutils.fs.rm(mnt_version, True)
74
+ dbutils.fs.mkdirs(mnt_version)
75
+
76
+ # COMMAND ----------
77
+
78
+ dbutils.fs.rm(mnt_version, True)
79
+ dbutils.fs.mkdirs(mnt_version)
80
+
81
+ # COMMAND ----------
82
+
83
+ print("copying version to", f"{mnt_version}/version")
84
+
85
+ for f in dbutils.fs.ls(f"{abfss_wheels}/{version}"):
86
+ to = f"{mnt_version}/{f.name}"
87
+
88
+ try:
89
+ dbutils.fs.ls(to)
90
+ except Exception:
91
+ print("uploading", f.name)
92
+ dbutils.fs.cp(f.path, to, recurse=True)
93
+
94
+ # COMMAND ----------
95
+
96
+ print("pip install requirements.txt")
97
+
98
+ out = subprocess.run(
99
+ [
100
+ "pip",
101
+ "install",
102
+ "--no-index",
103
+ f"--find-links={fuse_mnt_version}/wheels",
104
+ "-r",
105
+ f"{fuse_mnt_version}/requirements.txt",
106
+ ],
107
+ capture_output=True,
108
+ )
109
+
110
+ if out.returncode == 1:
111
+ raise ValueError(out.stderr)
112
+
113
+ # COMMAND ----------
114
+
115
+ latest = os.environ.get("latest")
116
+ assert latest
117
+ latest = latest.lower() == "true"
118
+
119
+ # COMMAND ----------
120
+
121
+ versions = [version, "2023.12.*"] if latest else [version]
122
+
123
+ # COMMAND ----------
124
+
125
+ print("deploy init script")
126
+
127
+ for v in versions:
128
+ path = f"{scripts}/{v}.sh"
129
+
130
+ with open(path, "w") as sh:
131
+ sh.write(
132
+ f"""
133
+ sudo echo FABRICKS_RUNTIME={runtime} >> /etc/environment
134
+ sudo echo FABRICKS_NOTEBOOKS={notebooks}/{version} >> /etc/environment
135
+ sudo echo FABRICKS_VERSION={version} >> /etc/environment
136
+
137
+ /databricks/python/bin/pip install --no-index --find-links='{fuse_mnt_version}/wheels' -r '{fuse_mnt_version}/requirements.txt'
138
+ /databricks/python/bin/pip install sqlglot
139
+ /databricks/python/bin/pip install jinja2
140
+ """.replace(" ", "").strip()
141
+ )
142
+
143
+ # COMMAND ----------
144
+
145
+ dbutils.notebook.exit("exit (0)")
146
+
147
+ # COMMAND ----------
@@ -0,0 +1,86 @@
1
+ # Databricks notebook source
2
+ # MAGIC %pip install python-dotenv
3
+
4
+ # COMMAND ----------
5
+
6
+ # MAGIC %pip install databricks_cli
7
+
8
+ # COMMAND ----------
9
+
10
+ import os
11
+ from importlib import resources
12
+ from pathlib import Path
13
+
14
+ from databricks.sdk.runtime import dbutils
15
+ from databricks_cli.sdk.api_client import ApiClient # type: ignore
16
+ from databricks_cli.workspace.api import WorkspaceApi # type: ignore
17
+ from dotenv import load_dotenv
18
+
19
+ from fabricks.api import notebooks as src
20
+
21
+ # COMMAND ----------
22
+
23
+ load_dotenv()
24
+
25
+ # COMMAND ----------
26
+
27
+ host = os.environ.get("databricks_host")
28
+ token = os.environ.get("databricks_token")
29
+ version = os.environ.get("fabricks_version")
30
+ root = os.environ.get("path_notebooks")
31
+ latest = os.environ.get("latest")
32
+ assert latest
33
+ latest = latest.lower() == "true"
34
+ runtime = os.environ.get("path_runtime")
35
+
36
+ # COMMAND ----------
37
+
38
+ notebooks = ["cluster", "initialize", "job", "log", "optimize", "run", "terminate"]
39
+
40
+ # COMMAND ----------
41
+
42
+ client = ApiClient(host=host, token=token)
43
+ workspace_api = WorkspaceApi(client)
44
+
45
+ # COMMAND ----------
46
+
47
+ versions = [version, "latest"] if latest else [version]
48
+
49
+ # COMMAND ----------
50
+
51
+ for v in versions:
52
+ print(f"deploy {v}")
53
+
54
+ p = f"{root}/{v}"
55
+ if not Path(p).exists():
56
+ os.mkdir(p)
57
+
58
+ for n in notebooks:
59
+ notebook = resources.files(src) / f"{n}.py"
60
+ workspace_api.import_workspace(
61
+ source_path=notebook,
62
+ target_path=f"{p.replace('/Workspace', '')}/{n}.py",
63
+ fmt="AUTO",
64
+ language="PYTHON",
65
+ is_overwrite=True,
66
+ )
67
+
68
+ # COMMAND ----------
69
+
70
+ if latest:
71
+ p = f"{runtime}/notebooks"
72
+ for n in notebooks:
73
+ notebook = resources.files(src) / f"{n}.py"
74
+ workspace_api.import_workspace(
75
+ source_path=notebook,
76
+ target_path=f"{p.replace('/Workspace', '')}/{n}.py",
77
+ fmt="AUTO",
78
+ language="PYTHON",
79
+ is_overwrite=True,
80
+ )
81
+
82
+ # COMMAND ----------
83
+
84
+ dbutils.notebook.exit("exit (0)")
85
+
86
+ # COMMAND ----------
@@ -0,0 +1,38 @@
1
+ # Databricks notebook source
2
+ from databricks.sdk.runtime import dbutils, display
3
+
4
+ from fabricks.core.scripts import generate
5
+
6
+ # COMMAND ----------
7
+
8
+ dbutils.widgets.text("schedule", "---")
9
+
10
+ # COMMAND ----------
11
+
12
+ schedule = dbutils.widgets.get("schedule")
13
+ assert schedule != "---", "no schedule provided"
14
+
15
+ # COMMAND ----------
16
+
17
+ print(schedule)
18
+
19
+ # COMMAND ----------
20
+
21
+ schedule_id, job_df, dependency_df = generate(schedule=schedule)
22
+
23
+ # COMMAND ----------
24
+
25
+ display(job_df)
26
+
27
+ # COMMAND ----------
28
+
29
+ display(dependency_df)
30
+
31
+ # COMMAND ----------
32
+
33
+ dbutils.jobs.taskValues.set(key="schedule_id", value=schedule_id)
34
+ dbutils.jobs.taskValues.set(key="schedule", value=schedule)
35
+
36
+ # COMMAND ----------
37
+
38
+ dbutils.notebook.exit("exit (0)")
@@ -0,0 +1,25 @@
1
+ # Databricks notebook source
2
+ from databricks.sdk.runtime import dbutils
3
+ from pyspark.errors.exceptions.base import IllegalArgumentException
4
+
5
+ from fabricks.core.scripts import optimize
6
+
7
+ # COMMAND ----------
8
+
9
+ dbutils.widgets.text("schedule_id", "---")
10
+
11
+ # COMMAND ----------
12
+
13
+ try:
14
+ schedule_id = dbutils.jobs.taskValues.get(taskKey="initialize", key="schedule_id")
15
+ except (TypeError, IllegalArgumentException, ValueError):
16
+ schedule_id = dbutils.widgets.get("schedule_id")
17
+ schedule_id = None if schedule_id == "---" else schedule_id
18
+
19
+ # COMMAND ----------
20
+
21
+ optimize(schedule_id=schedule_id)
22
+
23
+ # COMMAND ----------
24
+
25
+ dbutils.notebook.exit("exit (0)")
@@ -0,0 +1,50 @@
1
+ # Databricks notebook source
2
+ from databricks.sdk.runtime import dbutils
3
+ from pyspark.errors.exceptions.base import IllegalArgumentException
4
+
5
+ from fabricks.core.scripts import process
6
+
7
+ # COMMAND ----------
8
+
9
+ dbutils.widgets.text("step", "---")
10
+ dbutils.widgets.text("schedule_id", "---")
11
+
12
+ # COMMAND ----------
13
+
14
+ try:
15
+ schedule_id = dbutils.jobs.taskValues.get(taskKey="initialize", key="schedule_id")
16
+ except (TypeError, IllegalArgumentException, ValueError):
17
+ schedule_id = dbutils.widgets.get("schedule_id")
18
+ assert schedule_id != "---", "no schedule_id provided"
19
+
20
+ assert schedule_id is not None
21
+
22
+ # COMMAND ----------
23
+
24
+ step = dbutils.widgets.get("step")
25
+ assert step != "---", "no step provided"
26
+
27
+ # COMMAND ----------
28
+
29
+ schedule = dbutils.widgets.get("schedule")
30
+ assert schedule != "---", "no schedule provided"
31
+
32
+ # COMMAND ----------
33
+
34
+ print(schedule_id)
35
+
36
+ # COMMAND ----------
37
+
38
+ print(step)
39
+
40
+ # COMMAND ----------
41
+
42
+ print(schedule)
43
+
44
+ # COMMAND ----------
45
+
46
+ process(schedule_id=schedule_id, schedule=schedule, step=step)
47
+
48
+ # COMMAND ----------
49
+
50
+ dbutils.notebook.exit("exit (0)")
@@ -0,0 +1,87 @@
1
+ # Databricks notebook source
2
+ import json
3
+
4
+ from databricks.sdk.runtime import dbutils
5
+
6
+ from fabricks.core.dags.log import DagsLogger, DagsTableLogger
7
+ from fabricks.core.jobs import get_job
8
+
9
+ # COMMAND ----------
10
+
11
+ dbutils.widgets.text("step", "---")
12
+ dbutils.widgets.text("job_id", "---")
13
+ dbutils.widgets.text("job", "--")
14
+ dbutils.widgets.text("schedule_id", "---")
15
+ dbutils.widgets.text("schedule", "---")
16
+
17
+ # COMMAND ----------
18
+
19
+ step = dbutils.widgets.get("step")
20
+ assert step != "---"
21
+
22
+ # COMMAND ----------
23
+
24
+ job_id = dbutils.widgets.get("job_id")
25
+ assert job_id != "---"
26
+
27
+ # COMMAND ----------
28
+
29
+ job = dbutils.widgets.get("job")
30
+ assert job != "---"
31
+
32
+ # COMMAND ----------
33
+
34
+ schedule_id = dbutils.widgets.get("schedule_id")
35
+ assert schedule_id != "---"
36
+
37
+ # COMMAND ----------
38
+
39
+ schedule = dbutils.widgets.get("schedule")
40
+ assert schedule != "---"
41
+
42
+ # COMMAND ----------
43
+
44
+ context = json.loads(dbutils.notebook.entry_point.getDbutils().notebook().getContext().toJson()) # type: ignore
45
+ notebook_id = context.get("tags").get("jobId")
46
+
47
+ # COMMAND ----------
48
+
49
+ job = get_job(step=step, job_id=job_id)
50
+
51
+ # COMMAND ----------
52
+
53
+ print(job.qualified_name)
54
+
55
+ # COMMAND ----------
56
+
57
+ extra = {
58
+ "partition_key": schedule_id,
59
+ "schedule_id": schedule_id,
60
+ "schedule": schedule,
61
+ "step": step,
62
+ "job": job,
63
+ "notebook_id": notebook_id,
64
+ "target": "buffer",
65
+ }
66
+
67
+ # COMMAND ----------
68
+
69
+ DagsLogger.info("running", extra=extra)
70
+
71
+
72
+ # COMMAND ----------
73
+
74
+ try:
75
+ job.run(schedule_id=schedule_id, schedule=schedule)
76
+ DagsLogger.info("done", extra=extra)
77
+
78
+ except Exception as e:
79
+ DagsLogger.exception("failed", extra=extra)
80
+ raise e
81
+
82
+ finally:
83
+ DagsTableLogger.flush()
84
+
85
+ # COMMAND ----------
86
+
87
+ dbutils.notebook.exit("exit (0)")
@@ -0,0 +1,27 @@
1
+ # Databricks notebook source
2
+ from databricks.sdk.runtime import dbutils
3
+ from pyspark.errors.exceptions.base import IllegalArgumentException
4
+
5
+ from fabricks.core.scripts import terminate
6
+
7
+ # COMMAND ----------
8
+
9
+ try:
10
+ schedule_id = dbutils.jobs.taskValues.get(taskKey="initialize", key="schedule_id")
11
+ except (TypeError, IllegalArgumentException, ValueError):
12
+ schedule_id = dbutils.widgets.get("schedule_id")
13
+ assert schedule_id != "---", "no schedule_id provided"
14
+
15
+ assert schedule_id is not None
16
+
17
+ # COMMAND ----------
18
+
19
+ print(schedule_id)
20
+
21
+ # COMMAND ----------
22
+
23
+ terminate(schedule_id=schedule_id)
24
+
25
+ # COMMAND ----------
26
+
27
+ dbutils.notebook.exit("exit (0)")
@@ -0,0 +1,25 @@
1
+ # Databricks notebook source
2
+ from databricks.sdk.runtime import dbutils
3
+ from pyspark.errors.exceptions.base import IllegalArgumentException
4
+
5
+ from fabricks.core.scripts import vacuum
6
+
7
+ # COMMAND ----------
8
+
9
+ dbutils.widgets.text("schedule_id", "---")
10
+
11
+ # COMMAND ----------
12
+
13
+ try:
14
+ schedule_id = dbutils.jobs.taskValues.get(taskKey="initialize", key="schedule_id")
15
+ except (TypeError, IllegalArgumentException, ValueError):
16
+ schedule_id = dbutils.widgets.get("schedule_id")
17
+ schedule_id = None if schedule_id == "---" else schedule_id
18
+
19
+ # COMMAND ----------
20
+
21
+ vacuum(schedule_id=schedule_id)
22
+
23
+ # COMMAND ----------
24
+
25
+ dbutils.notebook.exit("exit (0)")
@@ -0,0 +1,3 @@
1
+ from fabricks.core.parsers import BaseParser, ParserOptions, parser
2
+
3
+ __all__ = ["BaseParser", "ParserOptions", "parser"]
fabricks/api/udfs.py ADDED
@@ -0,0 +1,3 @@
1
+ from fabricks.core.udfs import register_all_udfs, register_udf, udf
2
+
3
+ __all__ = ["udf", "register_all_udfs", "register_udf"]
fabricks/api/utils.py ADDED
@@ -0,0 +1,9 @@
1
+ from fabricks.utils.helpers import concat_dfs, concat_ws, run_in_parallel
2
+ from fabricks.utils.path import Path
3
+
4
+ __all__ = [
5
+ "concat_dfs",
6
+ "concat_ws",
7
+ "Path",
8
+ "run_in_parallel",
9
+ ]
@@ -0,0 +1,14 @@
1
+ from fabricks.cdc.base import BaseCDC, ChangeDataCaptures
2
+ from fabricks.cdc.cdc import CDC
3
+ from fabricks.cdc.nocdc import NoCDC
4
+ from fabricks.cdc.scd1 import SCD1
5
+ from fabricks.cdc.scd2 import SCD2
6
+
7
+ __all__ = [
8
+ "BaseCDC",
9
+ "CDC",
10
+ "ChangeDataCaptures",
11
+ "NoCDC",
12
+ "SCD1",
13
+ "SCD2",
14
+ ]
@@ -0,0 +1,4 @@
1
+ from fabricks.cdc.base.cdc import BaseCDC
2
+ from fabricks.cdc.base.types import ChangeDataCaptures
3
+
4
+ __all__ = ["BaseCDC", "ChangeDataCaptures"]
@@ -0,0 +1,5 @@
1
+ from fabricks.cdc.base.merger import Merger
2
+
3
+
4
+ class BaseCDC(Merger):
5
+ pass