databricks-bundle-decorators 0.1.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,55 @@
1
+ """databricks-bundle-decorators – decorator-based Databricks job/task framework.
2
+
3
+ Public API
4
+ ----------
5
+ Decorators:
6
+ ``@task``, ``@job``
7
+
8
+ Cluster configuration:
9
+ ``job_cluster()``
10
+
11
+ Data management:
12
+ ``IoManager``, ``OutputContext``, ``InputContext``
13
+
14
+ Task values (small scalars):
15
+ ``set_task_value``, ``get_task_value``
16
+
17
+ Job parameters:
18
+ ``params``
19
+ """
20
+
21
+ from databricks_bundle_decorators.context import params as params
22
+ from databricks_bundle_decorators.decorators import job as job
23
+ from databricks_bundle_decorators.decorators import job_cluster as job_cluster
24
+ from databricks_bundle_decorators.decorators import task as task
25
+ from databricks_bundle_decorators.discovery import (
26
+ discover_pipelines as discover_pipelines,
27
+ )
28
+ from databricks_bundle_decorators.io_manager import InputContext as InputContext
29
+ from databricks_bundle_decorators.io_manager import IoManager as IoManager
30
+ from databricks_bundle_decorators.io_manager import OutputContext as OutputContext
31
+ from databricks_bundle_decorators.registry import (
32
+ DuplicateResourceError as DuplicateResourceError,
33
+ )
34
+ from databricks_bundle_decorators.sdk_types import ClusterConfig as ClusterConfig
35
+ from databricks_bundle_decorators.sdk_types import JobConfig as JobConfig
36
+ from databricks_bundle_decorators.sdk_types import TaskConfig as TaskConfig
37
+ from databricks_bundle_decorators.task_values import get_task_value as get_task_value
38
+ from databricks_bundle_decorators.task_values import set_task_value as set_task_value
39
+
40
+ __all__ = [
41
+ "task",
42
+ "job",
43
+ "job_cluster",
44
+ "discover_pipelines",
45
+ "IoManager",
46
+ "OutputContext",
47
+ "InputContext",
48
+ "DuplicateResourceError",
49
+ "ClusterConfig",
50
+ "JobConfig",
51
+ "TaskConfig",
52
+ "set_task_value",
53
+ "get_task_value",
54
+ "params",
55
+ ]
@@ -0,0 +1,253 @@
1
+ """CLI for databricks-bundle-decorators.
2
+
3
+ Provides scaffolding commands for pipeline repositories.
4
+
5
+ Usage::
6
+
7
+ uv run dbxdec init
8
+ """
9
+
10
+ import argparse
11
+ import sys
12
+ import textwrap
13
+ from pathlib import Path
14
+
15
+ try:
16
+ import tomllib
17
+ except ModuleNotFoundError: # Python < 3.11
18
+ import tomli as tomllib # type: ignore[no-redef]
19
+
20
+
21
+ def _read_pyproject(cwd: Path) -> dict:
22
+ """Read and parse pyproject.toml from *cwd*."""
23
+ path = cwd / "pyproject.toml"
24
+ if not path.exists():
25
+ print(
26
+ "Error: No pyproject.toml found in the current directory.", file=sys.stderr
27
+ )
28
+ print(
29
+ "Run this command from the root of your pipeline project.", file=sys.stderr
30
+ )
31
+ sys.exit(1)
32
+ return tomllib.loads(path.read_text())
33
+
34
+
35
+ def _detect_package_name(pyproject: dict) -> str:
36
+ """Derive the Python import name from the project name in pyproject.toml."""
37
+ name = pyproject.get("project", {}).get("name")
38
+ if not name:
39
+ print("Error: No [project].name found in pyproject.toml.", file=sys.stderr)
40
+ sys.exit(1)
41
+ return name.replace("-", "_")
42
+
43
+
44
+ def _detect_src_layout(cwd: Path, package_name: str) -> Path:
45
+ """Return the package directory, detecting flat or src layout."""
46
+ src_path = cwd / "src" / package_name
47
+ flat_path = cwd / package_name
48
+ if src_path.exists():
49
+ return src_path
50
+ if flat_path.exists():
51
+ return flat_path
52
+ # Default to src layout (will be created)
53
+ return src_path
54
+
55
+
56
+ # --- File templates -------------------------------------------------------
57
+
58
+ _RESOURCES_INIT = '''\
59
+ """Resource loader for ``databricks bundle deploy``.
60
+
61
+ Referenced from ``python.resources`` in ``databricks.yaml``::
62
+
63
+ python:
64
+ venv_path: .venv
65
+ resources:
66
+ - 'resources:load_resources'
67
+ """
68
+
69
+ from databricks.bundles.core import Bundle, Resources
70
+
71
+
72
+ def load_resources(bundle: Bundle) -> Resources:
73
+ """Entry-point called by ``databricks bundle deploy``."""
74
+ from databricks_bundle_decorators.discovery import discover_pipelines
75
+ from databricks_bundle_decorators.codegen import generate_resources
76
+
77
+ discover_pipelines()
78
+
79
+ resources = Resources()
80
+ for key, job_resource in generate_resources().items():
81
+ resources.add_resource(key, job_resource)
82
+ return resources
83
+ '''
84
+
85
+ _PIPELINES_INIT = '''\
86
+ """Pipeline auto-discovery.
87
+
88
+ Every .py module in this package is imported automatically, triggering
89
+ @task / @job / @job_cluster decorator registration.
90
+ """
91
+
92
+
93
+
94
+ import importlib
95
+ import pkgutil
96
+
97
+ for _loader, _module_name, _is_pkg in pkgutil.walk_packages(__path__):
98
+ importlib.import_module(f"{__name__}.{_module_name}")
99
+ '''
100
+
101
+ _DATABRICKS_YAML = """\
102
+ bundle:
103
+ name: {project_name}
104
+
105
+ artifacts:
106
+ {package_name}:
107
+ type: whl
108
+ build: uv build --wheel
109
+ path: .
110
+
111
+ python:
112
+ venv_path: .venv
113
+ resources:
114
+ - 'resources:load_resources'
115
+
116
+ targets:
117
+ dev:
118
+ mode: development
119
+ workspace:
120
+ host: https://<your-workspace>.azuredatabricks.net/
121
+ """
122
+
123
+ _EXAMPLE_PIPELINE = '''\
124
+ """Example pipeline – replace with your own tasks and jobs."""
125
+
126
+ from databricks_bundle_decorators import job, job_cluster, task, params
127
+
128
+
129
+ default_cluster = job_cluster(
130
+ name="default_cluster",
131
+ spark_version="14.3.x-scala2.12",
132
+ node_type_id="Standard_DS3_v2",
133
+ num_workers=2,
134
+ )
135
+
136
+
137
+ @task
138
+ def hello():
139
+ print(f"Hello from databricks-bundle-decorators! url={{params.get('url', 'N/A')}}")
140
+
141
+
142
+ @job(
143
+ params={{"url": "https://example.com"}},
144
+ cluster="default_cluster",
145
+ )
146
+ def example_job():
147
+ hello()
148
+ '''
149
+
150
+
151
+ # --- Init command ----------------------------------------------------------
152
+
153
+
154
+ def _cmd_init(args: argparse.Namespace) -> None:
155
+ """Scaffold a new databricks-bundle-decorators pipeline project."""
156
+ cwd = Path.cwd()
157
+ pyproject = _read_pyproject(cwd)
158
+ package_name = _detect_package_name(pyproject)
159
+ project_name = pyproject["project"]["name"]
160
+ pkg_dir = _detect_src_layout(cwd, package_name)
161
+
162
+ created: list[str] = []
163
+ skipped: list[str] = []
164
+
165
+ def _write(path: Path, content: str) -> None:
166
+ if path.exists():
167
+ skipped.append(str(path.relative_to(cwd)))
168
+ return
169
+ path.parent.mkdir(parents=True, exist_ok=True)
170
+ path.write_text(content)
171
+ created.append(str(path.relative_to(cwd)))
172
+
173
+ # 1. resources/__init__.py
174
+ _write(cwd / "resources" / "__init__.py", _RESOURCES_INIT)
175
+
176
+ # 2. pipelines/__init__.py (auto-discovery)
177
+ _write(pkg_dir / "pipelines" / "__init__.py", _PIPELINES_INIT)
178
+
179
+ # 3. Example pipeline
180
+ _write(
181
+ pkg_dir / "pipelines" / "example.py",
182
+ _EXAMPLE_PIPELINE,
183
+ )
184
+
185
+ # 4. databricks.yaml
186
+ _write(
187
+ cwd / "databricks.yaml",
188
+ _DATABRICKS_YAML.format(
189
+ project_name=project_name,
190
+ package_name=package_name,
191
+ ),
192
+ )
193
+
194
+ # 5. Ensure package __init__.py exists
195
+ _write(pkg_dir / "__init__.py", "")
196
+
197
+ # --- Summary -----------------------------------------------------------
198
+ print()
199
+ print("databricks-bundle-decorators project initialized!")
200
+ print()
201
+
202
+ if created:
203
+ print("Created:")
204
+ for f in created:
205
+ print(f" {f}")
206
+
207
+ if skipped:
208
+ print("Skipped (already exist):")
209
+ for f in skipped:
210
+ print(f" {f}")
211
+
212
+ # --- Check for entry point in pyproject.toml ---------------------------
213
+ entry_points = (
214
+ pyproject.get("project", {})
215
+ .get("entry-points", {})
216
+ .get("databricks_bundle_decorators.pipelines", {})
217
+ )
218
+ if not entry_points:
219
+ print()
220
+ print("Next step: add the pipeline entry point to your pyproject.toml:")
221
+ print()
222
+ print(
223
+ textwrap.dedent(f"""\
224
+ [project.entry-points."databricks_bundle_decorators.pipelines"]
225
+ {package_name} = "{package_name}.pipelines"
226
+ """)
227
+ )
228
+
229
+ print("Done! Define your @task and @job functions in the pipelines/ directory.")
230
+
231
+
232
+ # --- Main ------------------------------------------------------------------
233
+
234
+
235
+ def main() -> None:
236
+ parser = argparse.ArgumentParser(
237
+ prog="dbxdec",
238
+ description="databricks-bundle-decorators CLI",
239
+ )
240
+ subparsers = parser.add_subparsers(dest="command")
241
+
242
+ subparsers.add_parser(
243
+ "init",
244
+ help="Scaffold a new databricks-bundle-decorators pipeline project",
245
+ )
246
+
247
+ args = parser.parse_args()
248
+
249
+ if args.command == "init":
250
+ _cmd_init(args)
251
+ else:
252
+ parser.print_help()
253
+ sys.exit(1)
@@ -0,0 +1,107 @@
1
+ """Convert registries into ``databricks.bundles.jobs`` resource objects.
2
+
3
+ Called at deploy time by the resource loader. Reads the global
4
+ registries populated by ``@task``, ``@job_cluster``, and ``@job`` decorators
5
+ and produces ``Job`` dataclass instances that the Databricks CLI
6
+ serialises into the bundle configuration.
7
+ """
8
+
9
+ from databricks_bundle_decorators.registry import (
10
+ _CLUSTER_REGISTRY,
11
+ _JOB_REGISTRY,
12
+ _TASK_REGISTRY,
13
+ )
14
+
15
+
16
+ def generate_resources(package_name: str = "databricks_bundle_decorators") -> dict:
17
+ """Build ``{resource_key: Job}`` from the global registries.
18
+
19
+ Parameters
20
+ ----------
21
+ package_name:
22
+ The Python package name used in ``PythonWheelTask``. Must match
23
+ the ``[project.name]`` in *pyproject.toml*.
24
+ """
25
+ from databricks.bundles.jobs import (
26
+ ClusterSpec,
27
+ Job,
28
+ JobCluster,
29
+ JobParameterDefinition,
30
+ Library,
31
+ PythonWheelTask,
32
+ Task,
33
+ TaskDependency,
34
+ )
35
+
36
+ jobs: dict[str, Job] = {}
37
+
38
+ for job_name, job_meta in _JOB_REGISTRY.items():
39
+ tasks: list[Task] = []
40
+
41
+ for task_key, upstream_keys in job_meta.dag.items():
42
+ depends_on = [TaskDependency(task_key=uk) for uk in upstream_keys]
43
+
44
+ # ----- named_parameters sent to the wheel entry-point ----------
45
+ named_params: dict[str, str] = {
46
+ "__job_name__": job_name,
47
+ "__task_key__": task_key,
48
+ "__run_id__": "{{job.run_id}}",
49
+ }
50
+
51
+ # Upstream edge info so the runtime can invoke IoManager.load()
52
+ edges = job_meta.dag_edges.get(task_key, {})
53
+ for param_name, upstream_task in edges.items():
54
+ named_params[f"__upstream__{param_name}"] = upstream_task
55
+
56
+ # Forward every job-level parameter to the task CLI
57
+ for param_name in job_meta.params:
58
+ named_params[param_name] = (
59
+ "{{" + f'job.parameters["{param_name}"]' + "}}"
60
+ )
61
+
62
+ # ----- per-task SDK config (max_retries, timeout, etc.) -----
63
+ qualified_key = f"{job_name}.{task_key}"
64
+ task_meta = _TASK_REGISTRY.get(qualified_key)
65
+ task_sdk_config = task_meta.sdk_config if task_meta else {}
66
+
67
+ task_obj = Task(
68
+ task_key=task_key,
69
+ depends_on=depends_on,
70
+ job_cluster_key=job_meta.cluster,
71
+ python_wheel_task=PythonWheelTask(
72
+ package_name=package_name,
73
+ entry_point="dbxdec-run",
74
+ named_parameters=named_params, # type: ignore[arg-type] # SDK Variable wrappers
75
+ ),
76
+ libraries=[Library(whl="dist/*.whl")],
77
+ **task_sdk_config,
78
+ )
79
+ tasks.append(task_obj)
80
+
81
+ # ----- job clusters -----------------------------------------------
82
+ job_clusters: list[JobCluster] = []
83
+ if job_meta.cluster and job_meta.cluster in _CLUSTER_REGISTRY:
84
+ cluster_meta = _CLUSTER_REGISTRY[job_meta.cluster]
85
+ job_clusters.append(
86
+ JobCluster(
87
+ job_cluster_key=cluster_meta.name,
88
+ new_cluster=ClusterSpec.from_dict(cluster_meta.spec), # type: ignore[arg-type] # typed as ClusterSpecDict
89
+ )
90
+ )
91
+
92
+ # ----- parameters -------------------------------------------------
93
+ parameters = [
94
+ JobParameterDefinition(name=k, default=v)
95
+ for k, v in job_meta.params.items()
96
+ ]
97
+
98
+ job_obj = Job(
99
+ name=job_name,
100
+ tasks=tasks, # type: ignore[arg-type] # SDK Variable wrappers
101
+ parameters=parameters,
102
+ job_clusters=job_clusters, # type: ignore[arg-type] # SDK Variable wrappers
103
+ **job_meta.sdk_config,
104
+ )
105
+ jobs[job_name] = job_obj
106
+
107
+ return jobs
@@ -0,0 +1,31 @@
1
+ """Runtime context – provides job parameters to task functions.
2
+
3
+ At runtime the entry point populates the global ``params`` dict from CLI
4
+ arguments (parsed via ``argparse``). Task code imports and reads it::
5
+
6
+ from databricks_bundle_decorators import params
7
+
8
+ @task
9
+ def my_task():
10
+ url = params["url"]
11
+ """
12
+
13
+ from typing import Any
14
+
15
+
16
+ class _Params(dict[str, Any]):
17
+ """Dict subclass that holds job parameters.
18
+
19
+ An instance lives at module level and is populated by the runtime
20
+ runner before the task function is invoked.
21
+ """
22
+
23
+
24
+ # Global params instance – importable by user code.
25
+ params: _Params = _Params()
26
+
27
+
28
+ def _populate_params(values: dict[str, Any]) -> None:
29
+ """Replace the contents of the global *params* dict."""
30
+ params.clear()
31
+ params.update(values)