databricks-job-runner 0.3.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- databricks_job_runner-0.3.0/PKG-INFO +289 -0
- databricks_job_runner-0.3.0/README.md +270 -0
- databricks_job_runner-0.3.0/pyproject.toml +32 -0
- databricks_job_runner-0.3.0/src/databricks_job_runner/__init__.py +16 -0
- databricks_job_runner-0.3.0/src/databricks_job_runner/clean.py +40 -0
- databricks_job_runner-0.3.0/src/databricks_job_runner/cli.py +160 -0
- databricks_job_runner-0.3.0/src/databricks_job_runner/compute.py +188 -0
- databricks_job_runner-0.3.0/src/databricks_job_runner/config.py +154 -0
- databricks_job_runner-0.3.0/src/databricks_job_runner/errors.py +13 -0
- databricks_job_runner-0.3.0/src/databricks_job_runner/logs.py +102 -0
- databricks_job_runner-0.3.0/src/databricks_job_runner/py.typed +0 -0
- databricks_job_runner-0.3.0/src/databricks_job_runner/runner.py +313 -0
- databricks_job_runner-0.3.0/src/databricks_job_runner/submit.py +88 -0
- databricks_job_runner-0.3.0/src/databricks_job_runner/upload.py +123 -0
- databricks_job_runner-0.3.0/src/databricks_job_runner/validate.py +52 -0
|
@@ -0,0 +1,289 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: databricks-job-runner
|
|
3
|
+
Version: 0.3.0
|
|
4
|
+
Summary: Reusable CLI for uploading, submitting, validating, fetching logs, and cleaning Databricks job runs
|
|
5
|
+
Author: Ryan Knight
|
|
6
|
+
Author-email: Ryan Knight <ryan.knight@neo4j.com>
|
|
7
|
+
License-Expression: MIT
|
|
8
|
+
Classifier: Development Status :: 4 - Beta
|
|
9
|
+
Classifier: Intended Audience :: Developers
|
|
10
|
+
Classifier: Programming Language :: Python :: 3
|
|
11
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
13
|
+
Classifier: Typing :: Typed
|
|
14
|
+
Requires-Dist: databricks-sdk
|
|
15
|
+
Requires-Dist: pydantic>=2
|
|
16
|
+
Requires-Python: >=3.12
|
|
17
|
+
Project-URL: Repository, https://github.com/neo4j-partners/databricks-job-runner
|
|
18
|
+
Description-Content-Type: text/markdown
|
|
19
|
+
|
|
20
|
+
# databricks-job-runner
|
|
21
|
+
|
|
22
|
+
Reusable CLI for uploading, submitting, and cleaning Databricks job runs.
|
|
23
|
+
|
|
24
|
+
Wraps the [Databricks Python SDK](https://docs.databricks.com/dev-tools/sdk-python.html) into a small library that each project configures with a `Runner` instance. One `Runner` gives you five CLI subcommands — `upload`, `submit`, `validate`, `logs`, and `clean` — without writing any Databricks API code in your project.
|
|
25
|
+
|
|
26
|
+
## Installation
|
|
27
|
+
|
|
28
|
+
```bash
|
|
29
|
+
uv add databricks-job-runner
|
|
30
|
+
```
|
|
31
|
+
|
|
32
|
+
Or with pip:
|
|
33
|
+
|
|
34
|
+
```bash
|
|
35
|
+
pip install databricks-job-runner
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
For local development against a checkout:
|
|
39
|
+
|
|
40
|
+
```toml
|
|
41
|
+
# pyproject.toml
|
|
42
|
+
[tool.uv.sources]
|
|
43
|
+
databricks-job-runner = { path = "../databricks-job-runner", editable = true }
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
> **Warning — do not list `databricks-job-runner` as a core dependency.**
|
|
47
|
+
>
|
|
48
|
+
> `databricks-job-runner` is a **local-only CLI tool** — it is not published to PyPI. If you add it to your project's `[project.dependencies]` (core dependencies), any wheel you build from that project will declare it as a requirement. When Databricks serverless (or any remote environment) tries to install your wheel, pip will fail because it cannot resolve `databricks-job-runner`.
|
|
49
|
+
>
|
|
50
|
+
> Instead, put it in an **optional extras group** so it is only installed locally:
|
|
51
|
+
>
|
|
52
|
+
> ```toml
|
|
53
|
+
> [project.optional-dependencies]
|
|
54
|
+
> cli = ["databricks-job-runner"]
|
|
55
|
+
> ```
|
|
56
|
+
>
|
|
57
|
+
> Then install locally with `uv sync --extra cli` (or `pip install -e '.[cli]'`). Your submitted scripts (e.g. `run_my_package.py`) should never import `databricks_job_runner` — they run on Databricks where it is not available.
|
|
58
|
+
|
|
59
|
+
## Quick start
|
|
60
|
+
|
|
61
|
+
Create a `cli/` package in your project with two files:
|
|
62
|
+
|
|
63
|
+
**`cli/__init__.py`**
|
|
64
|
+
|
|
65
|
+
```python
|
|
66
|
+
from databricks_job_runner import Runner, RunnerConfig
|
|
67
|
+
|
|
68
|
+
def build_params(config: RunnerConfig, script: str) -> list[str]:
|
|
69
|
+
"""Turn .env values into CLI args for the submitted script."""
|
|
70
|
+
params: list[str] = []
|
|
71
|
+
if config.extras.get("NEO4J_URI") and config.extras.get("NEO4J_PASSWORD"):
|
|
72
|
+
params += ["--neo4j-uri", config.extras["NEO4J_URI"],
|
|
73
|
+
"--neo4j-password", config.extras["NEO4J_PASSWORD"]]
|
|
74
|
+
return params
|
|
75
|
+
|
|
76
|
+
runner = Runner(
|
|
77
|
+
run_name_prefix="my_project",
|
|
78
|
+
build_params=build_params,
|
|
79
|
+
wheel_package="my_package", # optional
|
|
80
|
+
)
|
|
81
|
+
```
|
|
82
|
+
|
|
83
|
+
**`cli/__main__.py`**
|
|
84
|
+
|
|
85
|
+
```python
|
|
86
|
+
from cli import runner
|
|
87
|
+
runner.main()
|
|
88
|
+
```
|
|
89
|
+
|
|
90
|
+
Then run from the project root:
|
|
91
|
+
|
|
92
|
+
```bash
|
|
93
|
+
python -m cli upload --all # upload agent_modules/*.py
|
|
94
|
+
python -m cli upload test_hello.py # upload a single file
|
|
95
|
+
python -m cli upload --wheel # build and upload wheel
|
|
96
|
+
python -m cli submit test_hello.py # submit a job and wait
|
|
97
|
+
python -m cli submit test_hello.py --no-wait
|
|
98
|
+
python -m cli validate # list remote workspace contents
|
|
99
|
+
python -m cli validate test_hello.py # verify a specific file is uploaded
|
|
100
|
+
python -m cli logs # stdout/stderr from the most recent run
|
|
101
|
+
python -m cli logs 12345 # stdout/stderr from a specific run
|
|
102
|
+
python -m cli clean --yes # clean workspace + runs
|
|
103
|
+
python -m cli clean --runs --yes # clean only runs
|
|
104
|
+
```
|
|
105
|
+
|
|
106
|
+
## Configuration
|
|
107
|
+
|
|
108
|
+
The runner reads a `.env` file from the project root. Core keys (all prefixed with `DATABRICKS_` for consistency):
|
|
109
|
+
|
|
110
|
+
| Key | Default | Required | Description |
|
|
111
|
+
|-----|---------|----------|-------------|
|
|
112
|
+
| `DATABRICKS_PROFILE` | — | no | CLI profile in `~/.databrickscfg`. When unset, the SDK's unified auth falls back to env vars (`DATABRICKS_HOST`/`DATABRICKS_TOKEN`), Azure CLI, service principals, etc. |
|
|
113
|
+
| `DATABRICKS_COMPUTE_MODE` | `cluster` | no | `cluster` or `serverless`. Selects the compute backend for submitted jobs. |
|
|
114
|
+
| `DATABRICKS_CLUSTER_ID` | — | when `DATABRICKS_COMPUTE_MODE=cluster` | All-purpose cluster to run jobs on. Started automatically if terminated. |
|
|
115
|
+
| `DATABRICKS_SERVERLESS_ENV_VERSION` | `3` | no | Serverless environment version (e.g. `3` for Python 3.12). |
|
|
116
|
+
| `DATABRICKS_WORKSPACE_DIR` | — | yes | Remote workspace path (e.g. `/Users/you/my_project`) |
|
|
117
|
+
| `DATABRICKS_VOLUME_PATH` | — | when using `upload --wheel` | UC Volume path for wheel uploads. |
|
|
118
|
+
|
|
119
|
+
**Precedence:** pre-existing environment variables override `.env` values, matching 12-factor conventions (CI/CD and shell exports can override the file).
|
|
120
|
+
|
|
121
|
+
Additional non-core keys are captured in `RunnerConfig.extras` and passed to your `build_params` callback.
|
|
122
|
+
|
|
123
|
+
### Compute modes
|
|
124
|
+
|
|
125
|
+
- **Classic cluster** (`DATABRICKS_COMPUTE_MODE=cluster`, the default): jobs submit to an existing all-purpose cluster identified by `DATABRICKS_CLUSTER_ID`. The runner auto-starts the cluster if it is terminated, and attaches wheels via `Library(whl=...)`.
|
|
126
|
+
- **Serverless** (`DATABRICKS_COMPUTE_MODE=serverless`): jobs submit to Databricks serverless compute with a job-level environment spec. No cluster ID needed; wheels attach as `Environment.dependencies` entries (UC Volume paths are supported directly).
|
|
127
|
+
|
|
128
|
+
### Example `.env` (classic cluster)
|
|
129
|
+
|
|
130
|
+
```
|
|
131
|
+
DATABRICKS_PROFILE=my-profile
|
|
132
|
+
DATABRICKS_CLUSTER_ID=0123-456789-abcdef
|
|
133
|
+
DATABRICKS_WORKSPACE_DIR=/Users/ryan.knight@example.com/my_project
|
|
134
|
+
DATABRICKS_VOLUME_PATH=/Volumes/catalog/schema/volume
|
|
135
|
+
NEO4J_URI=neo4j+s://abc123.databases.neo4j.io
|
|
136
|
+
NEO4J_PASSWORD=secret
|
|
137
|
+
```
|
|
138
|
+
|
|
139
|
+
### Example `.env` (serverless)
|
|
140
|
+
|
|
141
|
+
```
|
|
142
|
+
DATABRICKS_PROFILE=my-profile
|
|
143
|
+
DATABRICKS_COMPUTE_MODE=serverless
|
|
144
|
+
DATABRICKS_SERVERLESS_ENV_VERSION=3
|
|
145
|
+
DATABRICKS_WORKSPACE_DIR=/Users/ryan.knight@example.com/my_project
|
|
146
|
+
DATABRICKS_VOLUME_PATH=/Volumes/catalog/schema/volume
|
|
147
|
+
```
|
|
148
|
+
|
|
149
|
+
All `DATABRICKS_*` keys listed above become typed fields on `RunnerConfig`; any other keys (like `NEO4J_URI` above) go into `config.extras`.
|
|
150
|
+
|
|
151
|
+
## API
|
|
152
|
+
|
|
153
|
+
### `Runner`
|
|
154
|
+
|
|
155
|
+
```python
|
|
156
|
+
Runner(
|
|
157
|
+
run_name_prefix: str,
|
|
158
|
+
build_params: BuildParamsFn | None = None,
|
|
159
|
+
project_dir: Path | str | None = None,
|
|
160
|
+
wheel_package: str | None = None,
|
|
161
|
+
)
|
|
162
|
+
```
|
|
163
|
+
|
|
164
|
+
| Parameter | Description |
|
|
165
|
+
|-----------|-------------|
|
|
166
|
+
| `run_name_prefix` | Prefix for job run names and cleanup filtering |
|
|
167
|
+
| `build_params` | Callback `(config: RunnerConfig) -> list[str]` that builds CLI args from typed config |
|
|
168
|
+
| `project_dir` | Project root (defaults to `cwd()`). Must contain `.env` and `agent_modules/` |
|
|
169
|
+
| `wheel_package` | Package name for wheel builds. Enables `upload --wheel`. Wheels upload to `<DATABRICKS_VOLUME_PATH>/wheels/` |
|
|
170
|
+
|
|
171
|
+
### `RunnerConfig`
|
|
172
|
+
|
|
173
|
+
Pydantic model holding parsed `.env` values. Frozen (immutable) after construction.
|
|
174
|
+
|
|
175
|
+
| Field | Type | Description |
|
|
176
|
+
|-------|------|-------------|
|
|
177
|
+
| `databricks_profile` | `str \| None` | CLI profile name, or `None` for unified-auth fallback |
|
|
178
|
+
| `databricks_compute_mode` | `Literal["cluster", "serverless"]` | Compute backend (`"cluster"` by default) |
|
|
179
|
+
| `databricks_cluster_id` | `str \| None` | Cluster ID (required when `databricks_compute_mode == "cluster"`) |
|
|
180
|
+
| `databricks_serverless_env_version` | `str` | Serverless environment version (default `"3"`) |
|
|
181
|
+
| `databricks_workspace_dir` | `str` | Remote workspace root (required) |
|
|
182
|
+
| `databricks_volume_path` | `str \| None` | UC Volume path for wheel uploads |
|
|
183
|
+
| `extras` | `dict[str, str]` | All non-core keys from `.env` |
|
|
184
|
+
|
|
185
|
+
### `BuildParamsFn`
|
|
186
|
+
|
|
187
|
+
```python
|
|
188
|
+
type BuildParamsFn = Callable[[RunnerConfig, str], list[str]]
|
|
189
|
+
```
|
|
190
|
+
|
|
191
|
+
Type alias for the `build_params` callback. The second argument is the script name being submitted, enabling per-script parameter injection.
|
|
192
|
+
|
|
193
|
+
### `RunnerError`
|
|
194
|
+
|
|
195
|
+
Raised when a runner operation cannot proceed (missing config, file not found, cluster stopped, job failed). The CLI formats and exits; library callers can catch and handle.
|
|
196
|
+
|
|
197
|
+
## Project layout
|
|
198
|
+
|
|
199
|
+
The runner expects this layout in your project:
|
|
200
|
+
|
|
201
|
+
```
|
|
202
|
+
my_project/
|
|
203
|
+
.env
|
|
204
|
+
agent_modules/
|
|
205
|
+
test_hello.py
|
|
206
|
+
run_lab2.py
|
|
207
|
+
...
|
|
208
|
+
cli/
|
|
209
|
+
__init__.py # Runner config
|
|
210
|
+
__main__.py # entry point
|
|
211
|
+
```
|
|
212
|
+
|
|
213
|
+
Scripts in `agent_modules/` are uploaded to `{DATABRICKS_WORKSPACE_DIR}/agent_modules/` on Databricks and submitted as Spark Python tasks.
|
|
214
|
+
|
|
215
|
+
## Subcommands
|
|
216
|
+
|
|
217
|
+
### `upload`
|
|
218
|
+
|
|
219
|
+
- **`upload <file>`** — Upload a single file from `agent_modules/`
|
|
220
|
+
- **`upload --all`** — Upload all `*.py` files from `agent_modules/`
|
|
221
|
+
- **`upload --wheel`** — Build a wheel with `uv build` and upload to the UC Volume (requires `wheel_package` and `DATABRICKS_VOLUME_PATH`)
|
|
222
|
+
|
|
223
|
+
### `submit`
|
|
224
|
+
|
|
225
|
+
- **`submit <script>`** — Submit a script as a one-time Databricks job and wait for completion. Default: `test_hello.py`
|
|
226
|
+
- **`submit <script> --no-wait`** — Submit without waiting
|
|
227
|
+
|
|
228
|
+
On classic mode, if the target cluster is not already `RUNNING`, it is started automatically and the submit waits (up to 20 minutes, the SDK default) for it to reach `RUNNING`. On serverless, no warm-up step is required. When submitting a script named `run_{wheel_package}.py`, the runner automatically attaches the wheel — as a `Library(whl=...)` on classic, or as an `Environment.dependencies` entry on serverless.
|
|
229
|
+
|
|
230
|
+
### `validate`
|
|
231
|
+
|
|
232
|
+
- **`validate`** — List the remote workspace directory and its `agent_modules/` subdirectory. On classic, auto-starts the cluster if needed; on serverless, this is a no-op.
|
|
233
|
+
- **`validate <file>`** — Also verify that `{DATABRICKS_WORKSPACE_DIR}/agent_modules/<file>` exists; exits non-zero if not.
|
|
234
|
+
|
|
235
|
+
### `logs`
|
|
236
|
+
|
|
237
|
+
- **`logs`** — Print stdout/stderr, error, and trace from the most recent run matching `{run_name_prefix}:*`
|
|
238
|
+
- **`logs <run_id>`** — Print output for a specific parent run ID
|
|
239
|
+
|
|
240
|
+
Output is fetched via the Jobs API's `get_run_output`, which returns the **tail 5 MB** of stdout/stderr captured per task (the API caps output size; truncation is signaled in the output). The runner resolves the parent run to its task-level run IDs automatically, so pass the parent `run_id` shown at submit time. Databricks auto-expires runs after 60 days.
|
|
241
|
+
|
|
242
|
+
### `clean`
|
|
243
|
+
|
|
244
|
+
- **`clean`** — Delete the remote workspace directory and all matching job runs
|
|
245
|
+
- **`clean --workspace`** — Delete only the workspace directory
|
|
246
|
+
- **`clean --runs`** — Delete only job runs
|
|
247
|
+
- **`clean --yes`** — Skip confirmation prompt
|
|
248
|
+
|
|
249
|
+
## Requirements
|
|
250
|
+
|
|
251
|
+
- Python 3.12+
|
|
252
|
+
- Databricks authentication: either a [Databricks CLI profile](https://docs.databricks.com/dev-tools/cli/index.html), or env vars (`DATABRICKS_HOST`/`DATABRICKS_TOKEN`), or any other [unified-auth](https://docs.databricks.com/dev-tools/auth/) method
|
|
253
|
+
- Either a Databricks all-purpose cluster (auto-started if terminated) or serverless compute enabled for the workspace
|
|
254
|
+
- [uv](https://docs.astral.sh/uv/) (for wheel building only)
|
|
255
|
+
|
|
256
|
+
## Architecture
|
|
257
|
+
|
|
258
|
+
`databricks-job-runner` is layered into a thin CLI, an orchestrator, and a set of single-purpose action modules. `Runner` is the only class consuming projects need to touch.
|
|
259
|
+
|
|
260
|
+
```
|
|
261
|
+
cli.py argparse + dispatch (flags -> Runner method calls)
|
|
262
|
+
|
|
|
263
|
+
runner.py Runner: holds config, owns the WorkspaceClient,
|
|
264
|
+
| exposes one method per subcommand
|
|
265
|
+
|
|
|
266
|
+
|-- config.py RunnerConfig (frozen pydantic) + .env parser
|
|
267
|
+
|-- compute.py ClassicCluster / Serverless strategies (Protocol)
|
|
268
|
+
|-- upload.py workspace file + wheel upload
|
|
269
|
+
|-- submit.py compute-agnostic job submission
|
|
270
|
+
|-- validate.py workspace listing + file-existence checks
|
|
271
|
+
|-- logs.py per-task stdout/stderr retrieval
|
|
272
|
+
|-- clean.py workspace + run cleanup
|
|
273
|
+
|-- errors.py RunnerError
|
|
274
|
+
```
|
|
275
|
+
|
|
276
|
+
### Layers
|
|
277
|
+
|
|
278
|
+
- **CLI (`cli.py`)** owns all argparse setup and translates the parsed namespace into method calls on `Runner`. Formats `RunnerError` into friendly exit messages. No argparse knowledge lives outside this file.
|
|
279
|
+
- **Orchestration (`runner.py`)** exposes the `Runner` class. `RunnerConfig` and the `WorkspaceClient` are built lazily on first access, so importing a project's `cli/__init__.py` doesn't touch Databricks. Each public method coordinates a single subcommand end-to-end.
|
|
280
|
+
- **Action modules** (`upload.py`, `submit.py`, `validate.py`, `logs.py`, `clean.py`) are plain functions wrapping SDK calls. None know about argparse or `Runner`, keeping each unit composable and independently testable.
|
|
281
|
+
- **Compute strategies (`compute.py`)** implement the `Compute` protocol. A strategy knows how to (1) validate that its backend is ready, (2) decorate a `SubmitTask` with backend-specific fields, and (3) produce the top-level `environments[]` list for `jobs.submit`. `submit_job` is compute-agnostic — swapping backends is a strategy change, not a conditional branch.
|
|
282
|
+
|
|
283
|
+
### Design choices
|
|
284
|
+
|
|
285
|
+
- **Strategy pattern for compute.** `Compute` is a `typing.Protocol`, so adding a new backend is a new frozen dataclass that matches the shape — no changes to `submit_job`, `Runner`, or the CLI. `ClassicCluster` and `Serverless` are both frozen dataclasses for value-equality and immutability.
|
|
286
|
+
- **Single validation point.** Required-key enforcement lives entirely in `RunnerConfig.from_env_file`, branching on `DATABRICKS_COMPUTE_MODE` (only `DATABRICKS_CLUSTER_ID` is required when mode is `cluster`). Downstream code trusts the config is valid.
|
|
287
|
+
- **`build_params` callback.** Project-specific config stays in the consumer's callback rather than the runner's `.env` schema. Core `DATABRICKS_*` keys are typed on `RunnerConfig`; everything else falls into `RunnerConfig.extras` for the callback to read.
|
|
288
|
+
- **Wheel convention.** A submitted script named exactly `run_{wheel_package}.py` auto-attaches the latest wheel from `dist/` — as `Library(whl=...)` on classic, or an `Environment.dependencies` entry on serverless. Ties `upload --wheel` and `submit run_xxx.py` together without adding a CLI flag.
|
|
289
|
+
- **12-factor `.env`.** Pre-existing env vars override `.env` values, so CI/CD exports and shell overrides trump the file — matching standard `.env` semantics.
|
|
@@ -0,0 +1,270 @@
|
|
|
1
|
+
# databricks-job-runner
|
|
2
|
+
|
|
3
|
+
Reusable CLI for uploading, submitting, and cleaning Databricks job runs.
|
|
4
|
+
|
|
5
|
+
Wraps the [Databricks Python SDK](https://docs.databricks.com/dev-tools/sdk-python.html) into a small library that each project configures with a `Runner` instance. One `Runner` gives you five CLI subcommands — `upload`, `submit`, `validate`, `logs`, and `clean` — without writing any Databricks API code in your project.
|
|
6
|
+
|
|
7
|
+
## Installation
|
|
8
|
+
|
|
9
|
+
```bash
|
|
10
|
+
uv add databricks-job-runner
|
|
11
|
+
```
|
|
12
|
+
|
|
13
|
+
Or with pip:
|
|
14
|
+
|
|
15
|
+
```bash
|
|
16
|
+
pip install databricks-job-runner
|
|
17
|
+
```
|
|
18
|
+
|
|
19
|
+
For local development against a checkout:
|
|
20
|
+
|
|
21
|
+
```toml
|
|
22
|
+
# pyproject.toml
|
|
23
|
+
[tool.uv.sources]
|
|
24
|
+
databricks-job-runner = { path = "../databricks-job-runner", editable = true }
|
|
25
|
+
```
|
|
26
|
+
|
|
27
|
+
> **Warning — do not list `databricks-job-runner` as a core dependency.**
|
|
28
|
+
>
|
|
29
|
+
> `databricks-job-runner` is a **local-only CLI tool** — it is not published to PyPI. If you add it to your project's `[project.dependencies]` (core dependencies), any wheel you build from that project will declare it as a requirement. When Databricks serverless (or any remote environment) tries to install your wheel, pip will fail because it cannot resolve `databricks-job-runner`.
|
|
30
|
+
>
|
|
31
|
+
> Instead, put it in an **optional extras group** so it is only installed locally:
|
|
32
|
+
>
|
|
33
|
+
> ```toml
|
|
34
|
+
> [project.optional-dependencies]
|
|
35
|
+
> cli = ["databricks-job-runner"]
|
|
36
|
+
> ```
|
|
37
|
+
>
|
|
38
|
+
> Then install locally with `uv sync --extra cli` (or `pip install -e '.[cli]'`). Your submitted scripts (e.g. `run_my_package.py`) should never import `databricks_job_runner` — they run on Databricks where it is not available.
|
|
39
|
+
|
|
40
|
+
## Quick start
|
|
41
|
+
|
|
42
|
+
Create a `cli/` package in your project with two files:
|
|
43
|
+
|
|
44
|
+
**`cli/__init__.py`**
|
|
45
|
+
|
|
46
|
+
```python
|
|
47
|
+
from databricks_job_runner import Runner, RunnerConfig
|
|
48
|
+
|
|
49
|
+
def build_params(config: RunnerConfig, script: str) -> list[str]:
|
|
50
|
+
"""Turn .env values into CLI args for the submitted script."""
|
|
51
|
+
params: list[str] = []
|
|
52
|
+
if config.extras.get("NEO4J_URI") and config.extras.get("NEO4J_PASSWORD"):
|
|
53
|
+
params += ["--neo4j-uri", config.extras["NEO4J_URI"],
|
|
54
|
+
"--neo4j-password", config.extras["NEO4J_PASSWORD"]]
|
|
55
|
+
return params
|
|
56
|
+
|
|
57
|
+
runner = Runner(
|
|
58
|
+
run_name_prefix="my_project",
|
|
59
|
+
build_params=build_params,
|
|
60
|
+
wheel_package="my_package", # optional
|
|
61
|
+
)
|
|
62
|
+
```
|
|
63
|
+
|
|
64
|
+
**`cli/__main__.py`**
|
|
65
|
+
|
|
66
|
+
```python
|
|
67
|
+
from cli import runner
|
|
68
|
+
runner.main()
|
|
69
|
+
```
|
|
70
|
+
|
|
71
|
+
Then run from the project root:
|
|
72
|
+
|
|
73
|
+
```bash
|
|
74
|
+
python -m cli upload --all # upload agent_modules/*.py
|
|
75
|
+
python -m cli upload test_hello.py # upload a single file
|
|
76
|
+
python -m cli upload --wheel # build and upload wheel
|
|
77
|
+
python -m cli submit test_hello.py # submit a job and wait
|
|
78
|
+
python -m cli submit test_hello.py --no-wait
|
|
79
|
+
python -m cli validate # list remote workspace contents
|
|
80
|
+
python -m cli validate test_hello.py # verify a specific file is uploaded
|
|
81
|
+
python -m cli logs # stdout/stderr from the most recent run
|
|
82
|
+
python -m cli logs 12345 # stdout/stderr from a specific run
|
|
83
|
+
python -m cli clean --yes # clean workspace + runs
|
|
84
|
+
python -m cli clean --runs --yes # clean only runs
|
|
85
|
+
```
|
|
86
|
+
|
|
87
|
+
## Configuration
|
|
88
|
+
|
|
89
|
+
The runner reads a `.env` file from the project root. Core keys (all prefixed with `DATABRICKS_` for consistency):
|
|
90
|
+
|
|
91
|
+
| Key | Default | Required | Description |
|
|
92
|
+
|-----|---------|----------|-------------|
|
|
93
|
+
| `DATABRICKS_PROFILE` | — | no | CLI profile in `~/.databrickscfg`. When unset, the SDK's unified auth falls back to env vars (`DATABRICKS_HOST`/`DATABRICKS_TOKEN`), Azure CLI, service principals, etc. |
|
|
94
|
+
| `DATABRICKS_COMPUTE_MODE` | `cluster` | no | `cluster` or `serverless`. Selects the compute backend for submitted jobs. |
|
|
95
|
+
| `DATABRICKS_CLUSTER_ID` | — | when `DATABRICKS_COMPUTE_MODE=cluster` | All-purpose cluster to run jobs on. Started automatically if terminated. |
|
|
96
|
+
| `DATABRICKS_SERVERLESS_ENV_VERSION` | `3` | no | Serverless environment version (e.g. `3` for Python 3.12). |
|
|
97
|
+
| `DATABRICKS_WORKSPACE_DIR` | — | yes | Remote workspace path (e.g. `/Users/you/my_project`) |
|
|
98
|
+
| `DATABRICKS_VOLUME_PATH` | — | when using `upload --wheel` | UC Volume path for wheel uploads. |
|
|
99
|
+
|
|
100
|
+
**Precedence:** pre-existing environment variables override `.env` values, matching 12-factor conventions (CI/CD and shell exports can override the file).
|
|
101
|
+
|
|
102
|
+
Additional non-core keys are captured in `RunnerConfig.extras` and passed to your `build_params` callback.
|
|
103
|
+
|
|
104
|
+
### Compute modes
|
|
105
|
+
|
|
106
|
+
- **Classic cluster** (`DATABRICKS_COMPUTE_MODE=cluster`, the default): jobs submit to an existing all-purpose cluster identified by `DATABRICKS_CLUSTER_ID`. The runner auto-starts the cluster if it is terminated, and attaches wheels via `Library(whl=...)`.
|
|
107
|
+
- **Serverless** (`DATABRICKS_COMPUTE_MODE=serverless`): jobs submit to Databricks serverless compute with a job-level environment spec. No cluster ID needed; wheels attach as `Environment.dependencies` entries (UC Volume paths are supported directly).
|
|
108
|
+
|
|
109
|
+
### Example `.env` (classic cluster)
|
|
110
|
+
|
|
111
|
+
```
|
|
112
|
+
DATABRICKS_PROFILE=my-profile
|
|
113
|
+
DATABRICKS_CLUSTER_ID=0123-456789-abcdef
|
|
114
|
+
DATABRICKS_WORKSPACE_DIR=/Users/ryan.knight@example.com/my_project
|
|
115
|
+
DATABRICKS_VOLUME_PATH=/Volumes/catalog/schema/volume
|
|
116
|
+
NEO4J_URI=neo4j+s://abc123.databases.neo4j.io
|
|
117
|
+
NEO4J_PASSWORD=secret
|
|
118
|
+
```
|
|
119
|
+
|
|
120
|
+
### Example `.env` (serverless)
|
|
121
|
+
|
|
122
|
+
```
|
|
123
|
+
DATABRICKS_PROFILE=my-profile
|
|
124
|
+
DATABRICKS_COMPUTE_MODE=serverless
|
|
125
|
+
DATABRICKS_SERVERLESS_ENV_VERSION=3
|
|
126
|
+
DATABRICKS_WORKSPACE_DIR=/Users/ryan.knight@example.com/my_project
|
|
127
|
+
DATABRICKS_VOLUME_PATH=/Volumes/catalog/schema/volume
|
|
128
|
+
```
|
|
129
|
+
|
|
130
|
+
All `DATABRICKS_*` keys listed above become typed fields on `RunnerConfig`; any other keys (like `NEO4J_URI` above) go into `config.extras`.
|
|
131
|
+
|
|
132
|
+
## API
|
|
133
|
+
|
|
134
|
+
### `Runner`
|
|
135
|
+
|
|
136
|
+
```python
|
|
137
|
+
Runner(
|
|
138
|
+
run_name_prefix: str,
|
|
139
|
+
build_params: BuildParamsFn | None = None,
|
|
140
|
+
project_dir: Path | str | None = None,
|
|
141
|
+
wheel_package: str | None = None,
|
|
142
|
+
)
|
|
143
|
+
```
|
|
144
|
+
|
|
145
|
+
| Parameter | Description |
|
|
146
|
+
|-----------|-------------|
|
|
147
|
+
| `run_name_prefix` | Prefix for job run names and cleanup filtering |
|
|
148
|
+
| `build_params` | Callback `(config: RunnerConfig) -> list[str]` that builds CLI args from typed config |
|
|
149
|
+
| `project_dir` | Project root (defaults to `cwd()`). Must contain `.env` and `agent_modules/` |
|
|
150
|
+
| `wheel_package` | Package name for wheel builds. Enables `upload --wheel`. Wheels upload to `<DATABRICKS_VOLUME_PATH>/wheels/` |
|
|
151
|
+
|
|
152
|
+
### `RunnerConfig`
|
|
153
|
+
|
|
154
|
+
Pydantic model holding parsed `.env` values. Frozen (immutable) after construction.
|
|
155
|
+
|
|
156
|
+
| Field | Type | Description |
|
|
157
|
+
|-------|------|-------------|
|
|
158
|
+
| `databricks_profile` | `str \| None` | CLI profile name, or `None` for unified-auth fallback |
|
|
159
|
+
| `databricks_compute_mode` | `Literal["cluster", "serverless"]` | Compute backend (`"cluster"` by default) |
|
|
160
|
+
| `databricks_cluster_id` | `str \| None` | Cluster ID (required when `databricks_compute_mode == "cluster"`) |
|
|
161
|
+
| `databricks_serverless_env_version` | `str` | Serverless environment version (default `"3"`) |
|
|
162
|
+
| `databricks_workspace_dir` | `str` | Remote workspace root (required) |
|
|
163
|
+
| `databricks_volume_path` | `str \| None` | UC Volume path for wheel uploads |
|
|
164
|
+
| `extras` | `dict[str, str]` | All non-core keys from `.env` |
|
|
165
|
+
|
|
166
|
+
### `BuildParamsFn`
|
|
167
|
+
|
|
168
|
+
```python
|
|
169
|
+
type BuildParamsFn = Callable[[RunnerConfig, str], list[str]]
|
|
170
|
+
```
|
|
171
|
+
|
|
172
|
+
Type alias for the `build_params` callback. The second argument is the script name being submitted, enabling per-script parameter injection.
|
|
173
|
+
|
|
174
|
+
### `RunnerError`
|
|
175
|
+
|
|
176
|
+
Raised when a runner operation cannot proceed (missing config, file not found, cluster stopped, job failed). The CLI formats and exits; library callers can catch and handle.
|
|
177
|
+
|
|
178
|
+
## Project layout
|
|
179
|
+
|
|
180
|
+
The runner expects this layout in your project:
|
|
181
|
+
|
|
182
|
+
```
|
|
183
|
+
my_project/
|
|
184
|
+
.env
|
|
185
|
+
agent_modules/
|
|
186
|
+
test_hello.py
|
|
187
|
+
run_lab2.py
|
|
188
|
+
...
|
|
189
|
+
cli/
|
|
190
|
+
__init__.py # Runner config
|
|
191
|
+
__main__.py # entry point
|
|
192
|
+
```
|
|
193
|
+
|
|
194
|
+
Scripts in `agent_modules/` are uploaded to `{DATABRICKS_WORKSPACE_DIR}/agent_modules/` on Databricks and submitted as Spark Python tasks.
|
|
195
|
+
|
|
196
|
+
## Subcommands
|
|
197
|
+
|
|
198
|
+
### `upload`
|
|
199
|
+
|
|
200
|
+
- **`upload <file>`** — Upload a single file from `agent_modules/`
|
|
201
|
+
- **`upload --all`** — Upload all `*.py` files from `agent_modules/`
|
|
202
|
+
- **`upload --wheel`** — Build a wheel with `uv build` and upload to the UC Volume (requires `wheel_package` and `DATABRICKS_VOLUME_PATH`)
|
|
203
|
+
|
|
204
|
+
### `submit`
|
|
205
|
+
|
|
206
|
+
- **`submit <script>`** — Submit a script as a one-time Databricks job and wait for completion. Default: `test_hello.py`
|
|
207
|
+
- **`submit <script> --no-wait`** — Submit without waiting
|
|
208
|
+
|
|
209
|
+
On classic mode, if the target cluster is not already `RUNNING`, it is started automatically and the submit waits (up to 20 minutes, the SDK default) for it to reach `RUNNING`. On serverless, no warm-up step is required. When submitting a script named `run_{wheel_package}.py`, the runner automatically attaches the wheel — as a `Library(whl=...)` on classic, or as an `Environment.dependencies` entry on serverless.
|
|
210
|
+
|
|
211
|
+
### `validate`
|
|
212
|
+
|
|
213
|
+
- **`validate`** — List the remote workspace directory and its `agent_modules/` subdirectory. On classic, auto-starts the cluster if needed; on serverless, this is a no-op.
|
|
214
|
+
- **`validate <file>`** — Also verify that `{DATABRICKS_WORKSPACE_DIR}/agent_modules/<file>` exists; exits non-zero if not.
|
|
215
|
+
|
|
216
|
+
### `logs`
|
|
217
|
+
|
|
218
|
+
- **`logs`** — Print stdout/stderr, error, and trace from the most recent run matching `{run_name_prefix}:*`
|
|
219
|
+
- **`logs <run_id>`** — Print output for a specific parent run ID
|
|
220
|
+
|
|
221
|
+
Output is fetched via the Jobs API's `get_run_output`, which returns the **tail 5 MB** of stdout/stderr captured per task (the API caps output size; truncation is signaled in the output). The runner resolves the parent run to its task-level run IDs automatically, so pass the parent `run_id` shown at submit time. Databricks auto-expires runs after 60 days.
|
|
222
|
+
|
|
223
|
+
### `clean`
|
|
224
|
+
|
|
225
|
+
- **`clean`** — Delete the remote workspace directory and all matching job runs
|
|
226
|
+
- **`clean --workspace`** — Delete only the workspace directory
|
|
227
|
+
- **`clean --runs`** — Delete only job runs
|
|
228
|
+
- **`clean --yes`** — Skip confirmation prompt
|
|
229
|
+
|
|
230
|
+
## Requirements
|
|
231
|
+
|
|
232
|
+
- Python 3.12+
|
|
233
|
+
- Databricks authentication: either a [Databricks CLI profile](https://docs.databricks.com/dev-tools/cli/index.html), or env vars (`DATABRICKS_HOST`/`DATABRICKS_TOKEN`), or any other [unified-auth](https://docs.databricks.com/dev-tools/auth/) method
|
|
234
|
+
- Either a Databricks all-purpose cluster (auto-started if terminated) or serverless compute enabled for the workspace
|
|
235
|
+
- [uv](https://docs.astral.sh/uv/) (for wheel building only)
|
|
236
|
+
|
|
237
|
+
## Architecture
|
|
238
|
+
|
|
239
|
+
`databricks-job-runner` is layered into a thin CLI, an orchestrator, and a set of single-purpose action modules. `Runner` is the only class consuming projects need to touch.
|
|
240
|
+
|
|
241
|
+
```
|
|
242
|
+
cli.py argparse + dispatch (flags -> Runner method calls)
|
|
243
|
+
|
|
|
244
|
+
runner.py Runner: holds config, owns the WorkspaceClient,
|
|
245
|
+
| exposes one method per subcommand
|
|
246
|
+
|
|
|
247
|
+
|-- config.py RunnerConfig (frozen pydantic) + .env parser
|
|
248
|
+
|-- compute.py ClassicCluster / Serverless strategies (Protocol)
|
|
249
|
+
|-- upload.py workspace file + wheel upload
|
|
250
|
+
|-- submit.py compute-agnostic job submission
|
|
251
|
+
|-- validate.py workspace listing + file-existence checks
|
|
252
|
+
|-- logs.py per-task stdout/stderr retrieval
|
|
253
|
+
|-- clean.py workspace + run cleanup
|
|
254
|
+
|-- errors.py RunnerError
|
|
255
|
+
```
|
|
256
|
+
|
|
257
|
+
### Layers
|
|
258
|
+
|
|
259
|
+
- **CLI (`cli.py`)** owns all argparse setup and translates the parsed namespace into method calls on `Runner`. Formats `RunnerError` into friendly exit messages. No argparse knowledge lives outside this file.
|
|
260
|
+
- **Orchestration (`runner.py`)** exposes the `Runner` class. `RunnerConfig` and the `WorkspaceClient` are built lazily on first access, so importing a project's `cli/__init__.py` doesn't touch Databricks. Each public method coordinates a single subcommand end-to-end.
|
|
261
|
+
- **Action modules** (`upload.py`, `submit.py`, `validate.py`, `logs.py`, `clean.py`) are plain functions wrapping SDK calls. None know about argparse or `Runner`, keeping each unit composable and independently testable.
|
|
262
|
+
- **Compute strategies (`compute.py`)** implement the `Compute` protocol. A strategy knows how to (1) validate that its backend is ready, (2) decorate a `SubmitTask` with backend-specific fields, and (3) produce the top-level `environments[]` list for `jobs.submit`. `submit_job` is compute-agnostic — swapping backends is a strategy change, not a conditional branch.
|
|
263
|
+
|
|
264
|
+
### Design choices
|
|
265
|
+
|
|
266
|
+
- **Strategy pattern for compute.** `Compute` is a `typing.Protocol`, so adding a new backend is a new frozen dataclass that matches the shape — no changes to `submit_job`, `Runner`, or the CLI. `ClassicCluster` and `Serverless` are both frozen dataclasses for value-equality and immutability.
|
|
267
|
+
- **Single validation point.** Required-key enforcement lives entirely in `RunnerConfig.from_env_file`, branching on `DATABRICKS_COMPUTE_MODE` (only `DATABRICKS_CLUSTER_ID` is required when mode is `cluster`). Downstream code trusts the config is valid.
|
|
268
|
+
- **`build_params` callback.** Project-specific config stays in the consumer's callback rather than the runner's `.env` schema. Core `DATABRICKS_*` keys are typed on `RunnerConfig`; everything else falls into `RunnerConfig.extras` for the callback to read.
|
|
269
|
+
- **Wheel convention.** A submitted script named exactly `run_{wheel_package}.py` auto-attaches the latest wheel from `dist/` — as `Library(whl=...)` on classic, or an `Environment.dependencies` entry on serverless. Ties `upload --wheel` and `submit run_xxx.py` together without adding a CLI flag.
|
|
270
|
+
- **12-factor `.env`.** Pre-existing env vars override `.env` values, so CI/CD exports and shell overrides trump the file — matching standard `.env` semantics.
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "databricks-job-runner"
|
|
3
|
+
version = "0.3.0"
|
|
4
|
+
description = "Reusable CLI for uploading, submitting, validating, fetching logs, and cleaning Databricks job runs"
|
|
5
|
+
readme = "README.md"
|
|
6
|
+
license = "MIT"
|
|
7
|
+
authors = [
|
|
8
|
+
{ name = "Ryan Knight", email = "ryan.knight@neo4j.com" }
|
|
9
|
+
]
|
|
10
|
+
requires-python = ">=3.12"
|
|
11
|
+
dependencies = [
|
|
12
|
+
"databricks-sdk",
|
|
13
|
+
"pydantic>=2",
|
|
14
|
+
]
|
|
15
|
+
classifiers = [
|
|
16
|
+
"Development Status :: 4 - Beta",
|
|
17
|
+
"Intended Audience :: Developers",
|
|
18
|
+
"Programming Language :: Python :: 3",
|
|
19
|
+
"Programming Language :: Python :: 3.12",
|
|
20
|
+
"Programming Language :: Python :: 3.13",
|
|
21
|
+
"Typing :: Typed",
|
|
22
|
+
]
|
|
23
|
+
|
|
24
|
+
[project.urls]
|
|
25
|
+
Repository = "https://github.com/neo4j-partners/databricks-job-runner"
|
|
26
|
+
|
|
27
|
+
[build-system]
|
|
28
|
+
requires = ["uv_build>=0.9.3,<0.10.0"]
|
|
29
|
+
build-backend = "uv_build"
|
|
30
|
+
|
|
31
|
+
[tool.uv.workspace]
|
|
32
|
+
members = ["examples/serverless_smoke"]
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
"""Reusable CLI for uploading, submitting, validating, fetching logs, and cleaning Databricks job runs."""
|
|
2
|
+
|
|
3
|
+
from databricks_job_runner.compute import ClassicCluster, Compute, Serverless
|
|
4
|
+
from databricks_job_runner.config import RunnerConfig
|
|
5
|
+
from databricks_job_runner.errors import RunnerError
|
|
6
|
+
from databricks_job_runner.runner import BuildParamsFn, Runner
|
|
7
|
+
|
|
8
|
+
__all__ = [
|
|
9
|
+
"BuildParamsFn",
|
|
10
|
+
"ClassicCluster",
|
|
11
|
+
"Compute",
|
|
12
|
+
"Runner",
|
|
13
|
+
"RunnerConfig",
|
|
14
|
+
"RunnerError",
|
|
15
|
+
"Serverless",
|
|
16
|
+
]
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
"""Clean up remote workspace directories and job runs."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from databricks.sdk import WorkspaceClient
|
|
6
|
+
from databricks.sdk.errors import NotFound
|
|
7
|
+
from databricks.sdk.service.jobs import RunType
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def clean_workspace(ws: WorkspaceClient, workspace_dir: str) -> None:
|
|
11
|
+
"""Delete the remote workspace directory recursively."""
|
|
12
|
+
print(f"Deleting remote workspace: {workspace_dir}")
|
|
13
|
+
try:
|
|
14
|
+
ws.workspace.delete(path=workspace_dir, recursive=True)
|
|
15
|
+
print(" Done.")
|
|
16
|
+
except NotFound:
|
|
17
|
+
print(" Directory does not exist or already deleted.")
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def clean_runs(ws: WorkspaceClient, run_name_prefix: str) -> None:
|
|
21
|
+
"""Find and delete all one-time job runs whose name starts with *prefix*."""
|
|
22
|
+
print(f"Finding job runs matching '{run_name_prefix}*'...")
|
|
23
|
+
|
|
24
|
+
deleted = 0
|
|
25
|
+
for run in ws.jobs.list_runs(run_type=RunType.SUBMIT_RUN, expand_tasks=False):
|
|
26
|
+
run_name = run.run_name or ""
|
|
27
|
+
run_id = run.run_id
|
|
28
|
+
if run_id is None or not run_name.startswith(run_name_prefix):
|
|
29
|
+
continue
|
|
30
|
+
print(f" Deleting run {run_id} ({run_name})")
|
|
31
|
+
try:
|
|
32
|
+
ws.jobs.delete_run(run_id)
|
|
33
|
+
deleted += 1
|
|
34
|
+
except NotFound:
|
|
35
|
+
print(f" Run {run_id} already deleted.")
|
|
36
|
+
|
|
37
|
+
if deleted:
|
|
38
|
+
print(f" Deleted {deleted} run(s).")
|
|
39
|
+
else:
|
|
40
|
+
print(" No matching runs found.")
|