databricks-job-runner 0.3.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,289 @@
1
+ Metadata-Version: 2.4
2
+ Name: databricks-job-runner
3
+ Version: 0.3.0
4
+ Summary: Reusable CLI for uploading, submitting, validating, fetching logs, and cleaning Databricks job runs
5
+ Author: Ryan Knight
6
+ Author-email: Ryan Knight <ryan.knight@neo4j.com>
7
+ License-Expression: MIT
8
+ Classifier: Development Status :: 4 - Beta
9
+ Classifier: Intended Audience :: Developers
10
+ Classifier: Programming Language :: Python :: 3
11
+ Classifier: Programming Language :: Python :: 3.12
12
+ Classifier: Programming Language :: Python :: 3.13
13
+ Classifier: Typing :: Typed
14
+ Requires-Dist: databricks-sdk
15
+ Requires-Dist: pydantic>=2
16
+ Requires-Python: >=3.12
17
+ Project-URL: Repository, https://github.com/neo4j-partners/databricks-job-runner
18
+ Description-Content-Type: text/markdown
19
+
20
+ # databricks-job-runner
21
+
22
+ Reusable CLI for uploading, submitting, and cleaning Databricks job runs.
23
+
24
+ Wraps the [Databricks Python SDK](https://docs.databricks.com/dev-tools/sdk-python.html) into a small library that each project configures with a `Runner` instance. One `Runner` gives you five CLI subcommands — `upload`, `submit`, `validate`, `logs`, and `clean` — without writing any Databricks API code in your project.
25
+
26
+ ## Installation
27
+
28
+ ```bash
29
+ uv add databricks-job-runner
30
+ ```
31
+
32
+ Or with pip:
33
+
34
+ ```bash
35
+ pip install databricks-job-runner
36
+ ```
37
+
38
+ For local development against a checkout:
39
+
40
+ ```toml
41
+ # pyproject.toml
42
+ [tool.uv.sources]
43
+ databricks-job-runner = { path = "../databricks-job-runner", editable = true }
44
+ ```
45
+
46
+ > **Warning — do not list `databricks-job-runner` as a core dependency.**
47
+ >
48
+ > `databricks-job-runner` is a **local-only CLI tool** — it is not published to PyPI. If you add it to your project's `[project.dependencies]` (core dependencies), any wheel you build from that project will declare it as a requirement. When Databricks serverless (or any remote environment) tries to install your wheel, pip will fail because it cannot resolve `databricks-job-runner`.
49
+ >
50
+ > Instead, put it in an **optional extras group** so it is only installed locally:
51
+ >
52
+ > ```toml
53
+ > [project.optional-dependencies]
54
+ > cli = ["databricks-job-runner"]
55
+ > ```
56
+ >
57
+ > Then install locally with `uv sync --extra cli` (or `pip install -e '.[cli]'`). Your submitted scripts (e.g. `run_my_package.py`) should never import `databricks_job_runner` — they run on Databricks where it is not available.
58
+
59
+ ## Quick start
60
+
61
+ Create a `cli/` package in your project with two files:
62
+
63
+ **`cli/__init__.py`**
64
+
65
+ ```python
66
+ from databricks_job_runner import Runner, RunnerConfig
67
+
68
+ def build_params(config: RunnerConfig, script: str) -> list[str]:
69
+ """Turn .env values into CLI args for the submitted script."""
70
+ params: list[str] = []
71
+ if config.extras.get("NEO4J_URI") and config.extras.get("NEO4J_PASSWORD"):
72
+ params += ["--neo4j-uri", config.extras["NEO4J_URI"],
73
+ "--neo4j-password", config.extras["NEO4J_PASSWORD"]]
74
+ return params
75
+
76
+ runner = Runner(
77
+ run_name_prefix="my_project",
78
+ build_params=build_params,
79
+ wheel_package="my_package", # optional
80
+ )
81
+ ```
82
+
83
+ **`cli/__main__.py`**
84
+
85
+ ```python
86
+ from cli import runner
87
+ runner.main()
88
+ ```
89
+
90
+ Then run from the project root:
91
+
92
+ ```bash
93
+ python -m cli upload --all # upload agent_modules/*.py
94
+ python -m cli upload test_hello.py # upload a single file
95
+ python -m cli upload --wheel # build and upload wheel
96
+ python -m cli submit test_hello.py # submit a job and wait
97
+ python -m cli submit test_hello.py --no-wait
98
+ python -m cli validate # list remote workspace contents
99
+ python -m cli validate test_hello.py # verify a specific file is uploaded
100
+ python -m cli logs # stdout/stderr from the most recent run
101
+ python -m cli logs 12345 # stdout/stderr from a specific run
102
+ python -m cli clean --yes # clean workspace + runs
103
+ python -m cli clean --runs --yes # clean only runs
104
+ ```
105
+
106
+ ## Configuration
107
+
108
+ The runner reads a `.env` file from the project root. Core keys (all prefixed with `DATABRICKS_` for consistency):
109
+
110
+ | Key | Default | Required | Description |
111
+ |-----|---------|----------|-------------|
112
+ | `DATABRICKS_PROFILE` | — | no | CLI profile in `~/.databrickscfg`. When unset, the SDK's unified auth falls back to env vars (`DATABRICKS_HOST`/`DATABRICKS_TOKEN`), Azure CLI, service principals, etc. |
113
+ | `DATABRICKS_COMPUTE_MODE` | `cluster` | no | `cluster` or `serverless`. Selects the compute backend for submitted jobs. |
114
+ | `DATABRICKS_CLUSTER_ID` | — | when `DATABRICKS_COMPUTE_MODE=cluster` | All-purpose cluster to run jobs on. Started automatically if terminated. |
115
+ | `DATABRICKS_SERVERLESS_ENV_VERSION` | `3` | no | Serverless environment version (e.g. `3` for Python 3.12). |
116
+ | `DATABRICKS_WORKSPACE_DIR` | — | yes | Remote workspace path (e.g. `/Users/you/my_project`) |
117
+ | `DATABRICKS_VOLUME_PATH` | — | when using `upload --wheel` | UC Volume path for wheel uploads. |
118
+
119
+ **Precedence:** pre-existing environment variables override `.env` values, matching 12-factor conventions (CI/CD and shell exports can override the file).
120
+
121
+ Additional non-core keys are captured in `RunnerConfig.extras` and passed to your `build_params` callback.
122
+
123
+ ### Compute modes
124
+
125
+ - **Classic cluster** (`DATABRICKS_COMPUTE_MODE=cluster`, the default): jobs submit to an existing all-purpose cluster identified by `DATABRICKS_CLUSTER_ID`. The runner auto-starts the cluster if it is terminated, and attaches wheels via `Library(whl=...)`.
126
+ - **Serverless** (`DATABRICKS_COMPUTE_MODE=serverless`): jobs submit to Databricks serverless compute with a job-level environment spec. No cluster ID needed; wheels attach as `Environment.dependencies` entries (UC Volume paths are supported directly).
127
+
128
+ ### Example `.env` (classic cluster)
129
+
130
+ ```
131
+ DATABRICKS_PROFILE=my-profile
132
+ DATABRICKS_CLUSTER_ID=0123-456789-abcdef
133
+ DATABRICKS_WORKSPACE_DIR=/Users/ryan.knight@example.com/my_project
134
+ DATABRICKS_VOLUME_PATH=/Volumes/catalog/schema/volume
135
+ NEO4J_URI=neo4j+s://abc123.databases.neo4j.io
136
+ NEO4J_PASSWORD=secret
137
+ ```
138
+
139
+ ### Example `.env` (serverless)
140
+
141
+ ```
142
+ DATABRICKS_PROFILE=my-profile
143
+ DATABRICKS_COMPUTE_MODE=serverless
144
+ DATABRICKS_SERVERLESS_ENV_VERSION=3
145
+ DATABRICKS_WORKSPACE_DIR=/Users/ryan.knight@example.com/my_project
146
+ DATABRICKS_VOLUME_PATH=/Volumes/catalog/schema/volume
147
+ ```
148
+
149
+ All `DATABRICKS_*` keys listed above become typed fields on `RunnerConfig`; any other keys (like `NEO4J_URI` above) go into `config.extras`.
150
+
151
+ ## API
152
+
153
+ ### `Runner`
154
+
155
+ ```python
156
+ Runner(
157
+ run_name_prefix: str,
158
+ build_params: BuildParamsFn | None = None,
159
+ project_dir: Path | str | None = None,
160
+ wheel_package: str | None = None,
161
+ )
162
+ ```
163
+
164
+ | Parameter | Description |
165
+ |-----------|-------------|
166
+ | `run_name_prefix` | Prefix for job run names and cleanup filtering |
167
+ | `build_params` | Callback `(config: RunnerConfig) -> list[str]` that builds CLI args from typed config |
168
+ | `project_dir` | Project root (defaults to `cwd()`). Must contain `.env` and `agent_modules/` |
169
+ | `wheel_package` | Package name for wheel builds. Enables `upload --wheel`. Wheels upload to `<DATABRICKS_VOLUME_PATH>/wheels/` |
170
+
171
+ ### `RunnerConfig`
172
+
173
+ Pydantic model holding parsed `.env` values. Frozen (immutable) after construction.
174
+
175
+ | Field | Type | Description |
176
+ |-------|------|-------------|
177
+ | `databricks_profile` | `str \| None` | CLI profile name, or `None` for unified-auth fallback |
178
+ | `databricks_compute_mode` | `Literal["cluster", "serverless"]` | Compute backend (`"cluster"` by default) |
179
+ | `databricks_cluster_id` | `str \| None` | Cluster ID (required when `databricks_compute_mode == "cluster"`) |
180
+ | `databricks_serverless_env_version` | `str` | Serverless environment version (default `"3"`) |
181
+ | `databricks_workspace_dir` | `str` | Remote workspace root (required) |
182
+ | `databricks_volume_path` | `str \| None` | UC Volume path for wheel uploads |
183
+ | `extras` | `dict[str, str]` | All non-core keys from `.env` |
184
+
185
+ ### `BuildParamsFn`
186
+
187
+ ```python
188
+ type BuildParamsFn = Callable[[RunnerConfig, str], list[str]]
189
+ ```
190
+
191
+ Type alias for the `build_params` callback. The second argument is the script name being submitted, enabling per-script parameter injection.
192
+
193
+ ### `RunnerError`
194
+
195
+ Raised when a runner operation cannot proceed (missing config, file not found, cluster stopped, job failed). The CLI formats and exits; library callers can catch and handle.
196
+
197
+ ## Project layout
198
+
199
+ The runner expects this layout in your project:
200
+
201
+ ```
202
+ my_project/
203
+ .env
204
+ agent_modules/
205
+ test_hello.py
206
+ run_lab2.py
207
+ ...
208
+ cli/
209
+ __init__.py # Runner config
210
+ __main__.py # entry point
211
+ ```
212
+
213
+ Scripts in `agent_modules/` are uploaded to `{DATABRICKS_WORKSPACE_DIR}/agent_modules/` on Databricks and submitted as Spark Python tasks.
214
+
215
+ ## Subcommands
216
+
217
+ ### `upload`
218
+
219
+ - **`upload <file>`** — Upload a single file from `agent_modules/`
220
+ - **`upload --all`** — Upload all `*.py` files from `agent_modules/`
221
+ - **`upload --wheel`** — Build a wheel with `uv build` and upload to the UC Volume (requires `wheel_package` and `DATABRICKS_VOLUME_PATH`)
222
+
223
+ ### `submit`
224
+
225
+ - **`submit <script>`** — Submit a script as a one-time Databricks job and wait for completion. Default: `test_hello.py`
226
+ - **`submit <script> --no-wait`** — Submit without waiting
227
+
228
+ On classic mode, if the target cluster is not already `RUNNING`, it is started automatically and the submit waits (up to 20 minutes, the SDK default) for it to reach `RUNNING`. On serverless, no warm-up step is required. When submitting a script named `run_{wheel_package}.py`, the runner automatically attaches the wheel — as a `Library(whl=...)` on classic, or as an `Environment.dependencies` entry on serverless.
229
+
230
+ ### `validate`
231
+
232
+ - **`validate`** — List the remote workspace directory and its `agent_modules/` subdirectory. On classic, auto-starts the cluster if needed; on serverless, this is a no-op.
233
+ - **`validate <file>`** — Also verify that `{DATABRICKS_WORKSPACE_DIR}/agent_modules/<file>` exists; exits non-zero if not.
234
+
235
+ ### `logs`
236
+
237
+ - **`logs`** — Print stdout/stderr, error, and trace from the most recent run matching `{run_name_prefix}:*`
238
+ - **`logs <run_id>`** — Print output for a specific parent run ID
239
+
240
+ Output is fetched via the Jobs API's `get_run_output`, which returns the **tail 5 MB** of stdout/stderr captured per task (the API caps output size; truncation is signaled in the output). The runner resolves the parent run to its task-level run IDs automatically, so pass the parent `run_id` shown at submit time. Databricks auto-expires runs after 60 days.
241
+
242
+ ### `clean`
243
+
244
+ - **`clean`** — Delete the remote workspace directory and all matching job runs
245
+ - **`clean --workspace`** — Delete only the workspace directory
246
+ - **`clean --runs`** — Delete only job runs
247
+ - **`clean --yes`** — Skip confirmation prompt
248
+
249
+ ## Requirements
250
+
251
+ - Python 3.12+
252
+ - Databricks authentication: either a [Databricks CLI profile](https://docs.databricks.com/dev-tools/cli/index.html), or env vars (`DATABRICKS_HOST`/`DATABRICKS_TOKEN`), or any other [unified-auth](https://docs.databricks.com/dev-tools/auth/) method
253
+ - Either a Databricks all-purpose cluster (auto-started if terminated) or serverless compute enabled for the workspace
254
+ - [uv](https://docs.astral.sh/uv/) (for wheel building only)
255
+
256
+ ## Architecture
257
+
258
+ `databricks-job-runner` is layered into a thin CLI, an orchestrator, and a set of single-purpose action modules. `Runner` is the only class consuming projects need to touch.
259
+
260
+ ```
261
+ cli.py argparse + dispatch (flags -> Runner method calls)
262
+ |
263
+ runner.py Runner: holds config, owns the WorkspaceClient,
264
+ | exposes one method per subcommand
265
+ |
266
+ |-- config.py RunnerConfig (frozen pydantic) + .env parser
267
+ |-- compute.py ClassicCluster / Serverless strategies (Protocol)
268
+ |-- upload.py workspace file + wheel upload
269
+ |-- submit.py compute-agnostic job submission
270
+ |-- validate.py workspace listing + file-existence checks
271
+ |-- logs.py per-task stdout/stderr retrieval
272
+ |-- clean.py workspace + run cleanup
273
+ |-- errors.py RunnerError
274
+ ```
275
+
276
+ ### Layers
277
+
278
+ - **CLI (`cli.py`)** owns all argparse setup and translates the parsed namespace into method calls on `Runner`. Formats `RunnerError` into friendly exit messages. No argparse knowledge lives outside this file.
279
+ - **Orchestration (`runner.py`)** exposes the `Runner` class. `RunnerConfig` and the `WorkspaceClient` are built lazily on first access, so importing a project's `cli/__init__.py` doesn't touch Databricks. Each public method coordinates a single subcommand end-to-end.
280
+ - **Action modules** (`upload.py`, `submit.py`, `validate.py`, `logs.py`, `clean.py`) are plain functions wrapping SDK calls. None know about argparse or `Runner`, keeping each unit composable and independently testable.
281
+ - **Compute strategies (`compute.py`)** implement the `Compute` protocol. A strategy knows how to (1) validate that its backend is ready, (2) decorate a `SubmitTask` with backend-specific fields, and (3) produce the top-level `environments[]` list for `jobs.submit`. `submit_job` is compute-agnostic — swapping backends is a strategy change, not a conditional branch.
282
+
283
+ ### Design choices
284
+
285
+ - **Strategy pattern for compute.** `Compute` is a `typing.Protocol`, so adding a new backend is a new frozen dataclass that matches the shape — no changes to `submit_job`, `Runner`, or the CLI. `ClassicCluster` and `Serverless` are both frozen dataclasses for value-equality and immutability.
286
+ - **Single validation point.** Required-key enforcement lives entirely in `RunnerConfig.from_env_file`, branching on `DATABRICKS_COMPUTE_MODE` (only `DATABRICKS_CLUSTER_ID` is required when mode is `cluster`). Downstream code trusts the config is valid.
287
+ - **`build_params` callback.** Project-specific config stays in the consumer's callback rather than the runner's `.env` schema. Core `DATABRICKS_*` keys are typed on `RunnerConfig`; everything else falls into `RunnerConfig.extras` for the callback to read.
288
+ - **Wheel convention.** A submitted script named exactly `run_{wheel_package}.py` auto-attaches the latest wheel from `dist/` — as `Library(whl=...)` on classic, or an `Environment.dependencies` entry on serverless. Ties `upload --wheel` and `submit run_xxx.py` together without adding a CLI flag.
289
+ - **12-factor `.env`.** Pre-existing env vars override `.env` values, so CI/CD exports and shell overrides trump the file — matching standard `.env` semantics.
@@ -0,0 +1,270 @@
1
+ # databricks-job-runner
2
+
3
+ Reusable CLI for uploading, submitting, and cleaning Databricks job runs.
4
+
5
+ Wraps the [Databricks Python SDK](https://docs.databricks.com/dev-tools/sdk-python.html) into a small library that each project configures with a `Runner` instance. One `Runner` gives you five CLI subcommands — `upload`, `submit`, `validate`, `logs`, and `clean` — without writing any Databricks API code in your project.
6
+
7
+ ## Installation
8
+
9
+ ```bash
10
+ uv add databricks-job-runner
11
+ ```
12
+
13
+ Or with pip:
14
+
15
+ ```bash
16
+ pip install databricks-job-runner
17
+ ```
18
+
19
+ For local development against a checkout:
20
+
21
+ ```toml
22
+ # pyproject.toml
23
+ [tool.uv.sources]
24
+ databricks-job-runner = { path = "../databricks-job-runner", editable = true }
25
+ ```
26
+
27
+ > **Warning — do not list `databricks-job-runner` as a core dependency.**
28
+ >
29
+ > `databricks-job-runner` is a **local-only CLI tool** — it is not published to PyPI. If you add it to your project's `[project.dependencies]` (core dependencies), any wheel you build from that project will declare it as a requirement. When Databricks serverless (or any remote environment) tries to install your wheel, pip will fail because it cannot resolve `databricks-job-runner`.
30
+ >
31
+ > Instead, put it in an **optional extras group** so it is only installed locally:
32
+ >
33
+ > ```toml
34
+ > [project.optional-dependencies]
35
+ > cli = ["databricks-job-runner"]
36
+ > ```
37
+ >
38
+ > Then install locally with `uv sync --extra cli` (or `pip install -e '.[cli]'`). Your submitted scripts (e.g. `run_my_package.py`) should never import `databricks_job_runner` — they run on Databricks where it is not available.
39
+
40
+ ## Quick start
41
+
42
+ Create a `cli/` package in your project with two files:
43
+
44
+ **`cli/__init__.py`**
45
+
46
+ ```python
47
+ from databricks_job_runner import Runner, RunnerConfig
48
+
49
+ def build_params(config: RunnerConfig, script: str) -> list[str]:
50
+ """Turn .env values into CLI args for the submitted script."""
51
+ params: list[str] = []
52
+ if config.extras.get("NEO4J_URI") and config.extras.get("NEO4J_PASSWORD"):
53
+ params += ["--neo4j-uri", config.extras["NEO4J_URI"],
54
+ "--neo4j-password", config.extras["NEO4J_PASSWORD"]]
55
+ return params
56
+
57
+ runner = Runner(
58
+ run_name_prefix="my_project",
59
+ build_params=build_params,
60
+ wheel_package="my_package", # optional
61
+ )
62
+ ```
63
+
64
+ **`cli/__main__.py`**
65
+
66
+ ```python
67
+ from cli import runner
68
+ runner.main()
69
+ ```
70
+
71
+ Then run from the project root:
72
+
73
+ ```bash
74
+ python -m cli upload --all # upload agent_modules/*.py
75
+ python -m cli upload test_hello.py # upload a single file
76
+ python -m cli upload --wheel # build and upload wheel
77
+ python -m cli submit test_hello.py # submit a job and wait
78
+ python -m cli submit test_hello.py --no-wait
79
+ python -m cli validate # list remote workspace contents
80
+ python -m cli validate test_hello.py # verify a specific file is uploaded
81
+ python -m cli logs # stdout/stderr from the most recent run
82
+ python -m cli logs 12345 # stdout/stderr from a specific run
83
+ python -m cli clean --yes # clean workspace + runs
84
+ python -m cli clean --runs --yes # clean only runs
85
+ ```
86
+
87
+ ## Configuration
88
+
89
+ The runner reads a `.env` file from the project root. Core keys (all prefixed with `DATABRICKS_` for consistency):
90
+
91
+ | Key | Default | Required | Description |
92
+ |-----|---------|----------|-------------|
93
+ | `DATABRICKS_PROFILE` | — | no | CLI profile in `~/.databrickscfg`. When unset, the SDK's unified auth falls back to env vars (`DATABRICKS_HOST`/`DATABRICKS_TOKEN`), Azure CLI, service principals, etc. |
94
+ | `DATABRICKS_COMPUTE_MODE` | `cluster` | no | `cluster` or `serverless`. Selects the compute backend for submitted jobs. |
95
+ | `DATABRICKS_CLUSTER_ID` | — | when `DATABRICKS_COMPUTE_MODE=cluster` | All-purpose cluster to run jobs on. Started automatically if terminated. |
96
+ | `DATABRICKS_SERVERLESS_ENV_VERSION` | `3` | no | Serverless environment version (e.g. `3` for Python 3.12). |
97
+ | `DATABRICKS_WORKSPACE_DIR` | — | yes | Remote workspace path (e.g. `/Users/you/my_project`) |
98
+ | `DATABRICKS_VOLUME_PATH` | — | when using `upload --wheel` | UC Volume path for wheel uploads. |
99
+
100
+ **Precedence:** pre-existing environment variables override `.env` values, matching 12-factor conventions (CI/CD and shell exports can override the file).
101
+
102
+ Additional non-core keys are captured in `RunnerConfig.extras` and passed to your `build_params` callback.
103
+
104
+ ### Compute modes
105
+
106
+ - **Classic cluster** (`DATABRICKS_COMPUTE_MODE=cluster`, the default): jobs submit to an existing all-purpose cluster identified by `DATABRICKS_CLUSTER_ID`. The runner auto-starts the cluster if it is terminated, and attaches wheels via `Library(whl=...)`.
107
+ - **Serverless** (`DATABRICKS_COMPUTE_MODE=serverless`): jobs submit to Databricks serverless compute with a job-level environment spec. No cluster ID needed; wheels attach as `Environment.dependencies` entries (UC Volume paths are supported directly).
108
+
109
+ ### Example `.env` (classic cluster)
110
+
111
+ ```
112
+ DATABRICKS_PROFILE=my-profile
113
+ DATABRICKS_CLUSTER_ID=0123-456789-abcdef
114
+ DATABRICKS_WORKSPACE_DIR=/Users/ryan.knight@example.com/my_project
115
+ DATABRICKS_VOLUME_PATH=/Volumes/catalog/schema/volume
116
+ NEO4J_URI=neo4j+s://abc123.databases.neo4j.io
117
+ NEO4J_PASSWORD=secret
118
+ ```
119
+
120
+ ### Example `.env` (serverless)
121
+
122
+ ```
123
+ DATABRICKS_PROFILE=my-profile
124
+ DATABRICKS_COMPUTE_MODE=serverless
125
+ DATABRICKS_SERVERLESS_ENV_VERSION=3
126
+ DATABRICKS_WORKSPACE_DIR=/Users/ryan.knight@example.com/my_project
127
+ DATABRICKS_VOLUME_PATH=/Volumes/catalog/schema/volume
128
+ ```
129
+
130
+ All `DATABRICKS_*` keys listed above become typed fields on `RunnerConfig`; any other keys (like `NEO4J_URI` above) go into `config.extras`.
131
+
132
+ ## API
133
+
134
+ ### `Runner`
135
+
136
+ ```python
137
+ Runner(
138
+ run_name_prefix: str,
139
+ build_params: BuildParamsFn | None = None,
140
+ project_dir: Path | str | None = None,
141
+ wheel_package: str | None = None,
142
+ )
143
+ ```
144
+
145
+ | Parameter | Description |
146
+ |-----------|-------------|
147
+ | `run_name_prefix` | Prefix for job run names and cleanup filtering |
148
+ | `build_params` | Callback `(config: RunnerConfig) -> list[str]` that builds CLI args from typed config |
149
+ | `project_dir` | Project root (defaults to `cwd()`). Must contain `.env` and `agent_modules/` |
150
+ | `wheel_package` | Package name for wheel builds. Enables `upload --wheel`. Wheels upload to `<DATABRICKS_VOLUME_PATH>/wheels/` |
151
+
152
+ ### `RunnerConfig`
153
+
154
+ Pydantic model holding parsed `.env` values. Frozen (immutable) after construction.
155
+
156
+ | Field | Type | Description |
157
+ |-------|------|-------------|
158
+ | `databricks_profile` | `str \| None` | CLI profile name, or `None` for unified-auth fallback |
159
+ | `databricks_compute_mode` | `Literal["cluster", "serverless"]` | Compute backend (`"cluster"` by default) |
160
+ | `databricks_cluster_id` | `str \| None` | Cluster ID (required when `databricks_compute_mode == "cluster"`) |
161
+ | `databricks_serverless_env_version` | `str` | Serverless environment version (default `"3"`) |
162
+ | `databricks_workspace_dir` | `str` | Remote workspace root (required) |
163
+ | `databricks_volume_path` | `str \| None` | UC Volume path for wheel uploads |
164
+ | `extras` | `dict[str, str]` | All non-core keys from `.env` |
165
+
166
+ ### `BuildParamsFn`
167
+
168
+ ```python
169
+ type BuildParamsFn = Callable[[RunnerConfig, str], list[str]]
170
+ ```
171
+
172
+ Type alias for the `build_params` callback. The second argument is the script name being submitted, enabling per-script parameter injection.
173
+
174
+ ### `RunnerError`
175
+
176
+ Raised when a runner operation cannot proceed (missing config, file not found, cluster stopped, job failed). The CLI formats and exits; library callers can catch and handle.
177
+
178
+ ## Project layout
179
+
180
+ The runner expects this layout in your project:
181
+
182
+ ```
183
+ my_project/
184
+ .env
185
+ agent_modules/
186
+ test_hello.py
187
+ run_lab2.py
188
+ ...
189
+ cli/
190
+ __init__.py # Runner config
191
+ __main__.py # entry point
192
+ ```
193
+
194
+ Scripts in `agent_modules/` are uploaded to `{DATABRICKS_WORKSPACE_DIR}/agent_modules/` on Databricks and submitted as Spark Python tasks.
195
+
196
+ ## Subcommands
197
+
198
+ ### `upload`
199
+
200
+ - **`upload <file>`** — Upload a single file from `agent_modules/`
201
+ - **`upload --all`** — Upload all `*.py` files from `agent_modules/`
202
+ - **`upload --wheel`** — Build a wheel with `uv build` and upload to the UC Volume (requires `wheel_package` and `DATABRICKS_VOLUME_PATH`)
203
+
204
+ ### `submit`
205
+
206
+ - **`submit <script>`** — Submit a script as a one-time Databricks job and wait for completion. Default: `test_hello.py`
207
+ - **`submit <script> --no-wait`** — Submit without waiting
208
+
209
+ On classic mode, if the target cluster is not already `RUNNING`, it is started automatically and the submit waits (up to 20 minutes, the SDK default) for it to reach `RUNNING`. On serverless, no warm-up step is required. When submitting a script named `run_{wheel_package}.py`, the runner automatically attaches the wheel — as a `Library(whl=...)` on classic, or as an `Environment.dependencies` entry on serverless.
210
+
211
+ ### `validate`
212
+
213
+ - **`validate`** — List the remote workspace directory and its `agent_modules/` subdirectory. On classic, auto-starts the cluster if needed; on serverless, this is a no-op.
214
+ - **`validate <file>`** — Also verify that `{DATABRICKS_WORKSPACE_DIR}/agent_modules/<file>` exists; exits non-zero if not.
215
+
216
+ ### `logs`
217
+
218
+ - **`logs`** — Print stdout/stderr, error, and trace from the most recent run matching `{run_name_prefix}:*`
219
+ - **`logs <run_id>`** — Print output for a specific parent run ID
220
+
221
+ Output is fetched via the Jobs API's `get_run_output`, which returns the **tail 5 MB** of stdout/stderr captured per task (the API caps output size; truncation is signaled in the output). The runner resolves the parent run to its task-level run IDs automatically, so pass the parent `run_id` shown at submit time. Databricks auto-expires runs after 60 days.
222
+
223
+ ### `clean`
224
+
225
+ - **`clean`** — Delete the remote workspace directory and all matching job runs
226
+ - **`clean --workspace`** — Delete only the workspace directory
227
+ - **`clean --runs`** — Delete only job runs
228
+ - **`clean --yes`** — Skip confirmation prompt
229
+
230
+ ## Requirements
231
+
232
+ - Python 3.12+
233
+ - Databricks authentication: either a [Databricks CLI profile](https://docs.databricks.com/dev-tools/cli/index.html), or env vars (`DATABRICKS_HOST`/`DATABRICKS_TOKEN`), or any other [unified-auth](https://docs.databricks.com/dev-tools/auth/) method
234
+ - Either a Databricks all-purpose cluster (auto-started if terminated) or serverless compute enabled for the workspace
235
+ - [uv](https://docs.astral.sh/uv/) (for wheel building only)
236
+
237
+ ## Architecture
238
+
239
+ `databricks-job-runner` is layered into a thin CLI, an orchestrator, and a set of single-purpose action modules. `Runner` is the only class consuming projects need to touch.
240
+
241
+ ```
242
+ cli.py argparse + dispatch (flags -> Runner method calls)
243
+ |
244
+ runner.py Runner: holds config, owns the WorkspaceClient,
245
+ | exposes one method per subcommand
246
+ |
247
+ |-- config.py RunnerConfig (frozen pydantic) + .env parser
248
+ |-- compute.py ClassicCluster / Serverless strategies (Protocol)
249
+ |-- upload.py workspace file + wheel upload
250
+ |-- submit.py compute-agnostic job submission
251
+ |-- validate.py workspace listing + file-existence checks
252
+ |-- logs.py per-task stdout/stderr retrieval
253
+ |-- clean.py workspace + run cleanup
254
+ |-- errors.py RunnerError
255
+ ```
256
+
257
+ ### Layers
258
+
259
+ - **CLI (`cli.py`)** owns all argparse setup and translates the parsed namespace into method calls on `Runner`. Formats `RunnerError` into friendly exit messages. No argparse knowledge lives outside this file.
260
+ - **Orchestration (`runner.py`)** exposes the `Runner` class. `RunnerConfig` and the `WorkspaceClient` are built lazily on first access, so importing a project's `cli/__init__.py` doesn't touch Databricks. Each public method coordinates a single subcommand end-to-end.
261
+ - **Action modules** (`upload.py`, `submit.py`, `validate.py`, `logs.py`, `clean.py`) are plain functions wrapping SDK calls. None know about argparse or `Runner`, keeping each unit composable and independently testable.
262
+ - **Compute strategies (`compute.py`)** implement the `Compute` protocol. A strategy knows how to (1) validate that its backend is ready, (2) decorate a `SubmitTask` with backend-specific fields, and (3) produce the top-level `environments[]` list for `jobs.submit`. `submit_job` is compute-agnostic — swapping backends is a strategy change, not a conditional branch.
263
+
264
+ ### Design choices
265
+
266
+ - **Strategy pattern for compute.** `Compute` is a `typing.Protocol`, so adding a new backend is a new frozen dataclass that matches the shape — no changes to `submit_job`, `Runner`, or the CLI. `ClassicCluster` and `Serverless` are both frozen dataclasses for value-equality and immutability.
267
+ - **Single validation point.** Required-key enforcement lives entirely in `RunnerConfig.from_env_file`, branching on `DATABRICKS_COMPUTE_MODE` (only `DATABRICKS_CLUSTER_ID` is required when mode is `cluster`). Downstream code trusts the config is valid.
268
+ - **`build_params` callback.** Project-specific config stays in the consumer's callback rather than the runner's `.env` schema. Core `DATABRICKS_*` keys are typed on `RunnerConfig`; everything else falls into `RunnerConfig.extras` for the callback to read.
269
+ - **Wheel convention.** A submitted script named exactly `run_{wheel_package}.py` auto-attaches the latest wheel from `dist/` — as `Library(whl=...)` on classic, or an `Environment.dependencies` entry on serverless. Ties `upload --wheel` and `submit run_xxx.py` together without adding a CLI flag.
270
+ - **12-factor `.env`.** Pre-existing env vars override `.env` values, so CI/CD exports and shell overrides trump the file — matching standard `.env` semantics.
@@ -0,0 +1,32 @@
1
+ [project]
2
+ name = "databricks-job-runner"
3
+ version = "0.3.0"
4
+ description = "Reusable CLI for uploading, submitting, validating, fetching logs, and cleaning Databricks job runs"
5
+ readme = "README.md"
6
+ license = "MIT"
7
+ authors = [
8
+ { name = "Ryan Knight", email = "ryan.knight@neo4j.com" }
9
+ ]
10
+ requires-python = ">=3.12"
11
+ dependencies = [
12
+ "databricks-sdk",
13
+ "pydantic>=2",
14
+ ]
15
+ classifiers = [
16
+ "Development Status :: 4 - Beta",
17
+ "Intended Audience :: Developers",
18
+ "Programming Language :: Python :: 3",
19
+ "Programming Language :: Python :: 3.12",
20
+ "Programming Language :: Python :: 3.13",
21
+ "Typing :: Typed",
22
+ ]
23
+
24
+ [project.urls]
25
+ Repository = "https://github.com/neo4j-partners/databricks-job-runner"
26
+
27
+ [build-system]
28
+ requires = ["uv_build>=0.9.3,<0.10.0"]
29
+ build-backend = "uv_build"
30
+
31
+ [tool.uv.workspace]
32
+ members = ["examples/serverless_smoke"]
@@ -0,0 +1,16 @@
1
+ """Reusable CLI for uploading, submitting, validating, fetching logs, and cleaning Databricks job runs."""
2
+
3
+ from databricks_job_runner.compute import ClassicCluster, Compute, Serverless
4
+ from databricks_job_runner.config import RunnerConfig
5
+ from databricks_job_runner.errors import RunnerError
6
+ from databricks_job_runner.runner import BuildParamsFn, Runner
7
+
8
+ __all__ = [
9
+ "BuildParamsFn",
10
+ "ClassicCluster",
11
+ "Compute",
12
+ "Runner",
13
+ "RunnerConfig",
14
+ "RunnerError",
15
+ "Serverless",
16
+ ]
@@ -0,0 +1,40 @@
1
+ """Clean up remote workspace directories and job runs."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from databricks.sdk import WorkspaceClient
6
+ from databricks.sdk.errors import NotFound
7
+ from databricks.sdk.service.jobs import RunType
8
+
9
+
10
+ def clean_workspace(ws: WorkspaceClient, workspace_dir: str) -> None:
11
+ """Delete the remote workspace directory recursively."""
12
+ print(f"Deleting remote workspace: {workspace_dir}")
13
+ try:
14
+ ws.workspace.delete(path=workspace_dir, recursive=True)
15
+ print(" Done.")
16
+ except NotFound:
17
+ print(" Directory does not exist or already deleted.")
18
+
19
+
20
+ def clean_runs(ws: WorkspaceClient, run_name_prefix: str) -> None:
21
+ """Find and delete all one-time job runs whose name starts with *prefix*."""
22
+ print(f"Finding job runs matching '{run_name_prefix}*'...")
23
+
24
+ deleted = 0
25
+ for run in ws.jobs.list_runs(run_type=RunType.SUBMIT_RUN, expand_tasks=False):
26
+ run_name = run.run_name or ""
27
+ run_id = run.run_id
28
+ if run_id is None or not run_name.startswith(run_name_prefix):
29
+ continue
30
+ print(f" Deleting run {run_id} ({run_name})")
31
+ try:
32
+ ws.jobs.delete_run(run_id)
33
+ deleted += 1
34
+ except NotFound:
35
+ print(f" Run {run_id} already deleted.")
36
+
37
+ if deleted:
38
+ print(f" Deleted {deleted} run(s).")
39
+ else:
40
+ print(" No matching runs found.")